7364 lines
236 KiB
Diff
7364 lines
236 KiB
Diff
|
|
From aec213c228426fbad3cd9d4038dffecaf92947bf Mon Sep 17 00:00:00 2001
|
||
|
|
From: Ron Yorston <rmy@pobox.com>
|
||
|
|
Date: Wed, 27 Jan 2021 11:19:14 +0000
|
||
|
|
Subject: [PATCH 01/61] awk: allow printf('%c') to output NUL, closes 13486
|
||
|
|
|
||
|
|
Treat the output of printf as binary rather than a null-terminated
|
||
|
|
string so that NUL characters can be output.
|
||
|
|
|
||
|
|
This is considered to be a GNU extension, though it's also available
|
||
|
|
in mawk and FreeBSD's awk.
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
evaluate 3487 3504 +17
|
||
|
|
awk_printf 504 519 +15
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 2/0 up/down: 32/0) Total: 32 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Ron Yorston <rmy@pobox.com>
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 18 +++++++++++++++---
|
||
|
|
testsuite/awk.tests | 5 +++++
|
||
|
|
2 files changed, 20 insertions(+), 3 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 2c15f9e4e..b4f6a3741 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -2155,7 +2155,10 @@ static int fmt_num(char *b, int size, const char *format, double n, int int_as_i
|
||
|
|
}
|
||
|
|
|
||
|
|
/* formatted output into an allocated buffer, return ptr to buffer */
|
||
|
|
-static char *awk_printf(node *n)
|
||
|
|
+#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
+# define awk_printf(a, b) awk_printf(a)
|
||
|
|
+#endif
|
||
|
|
+static char *awk_printf(node *n, int *len)
|
||
|
|
{
|
||
|
|
char *b = NULL;
|
||
|
|
char *fmt, *s, *f;
|
||
|
|
@@ -2209,6 +2212,10 @@ static char *awk_printf(node *n)
|
||
|
|
nvfree(v);
|
||
|
|
b = xrealloc(b, i + 1);
|
||
|
|
b[i] = '\0';
|
||
|
|
+#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
+ if (len)
|
||
|
|
+ *len = i;
|
||
|
|
+#endif
|
||
|
|
return b;
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -2666,6 +2673,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
case XC( OC_PRINT ):
|
||
|
|
case XC( OC_PRINTF ): {
|
||
|
|
FILE *F = stdout;
|
||
|
|
+ IF_FEATURE_AWK_GNU_EXTENSIONS(int len;)
|
||
|
|
|
||
|
|
if (op->r.n) {
|
||
|
|
rstream *rsm = newfile(R.s);
|
||
|
|
@@ -2703,8 +2711,12 @@ static var *evaluate(node *op, var *res)
|
||
|
|
fputs(getvar_s(intvar[ORS]), F);
|
||
|
|
|
||
|
|
} else { /* OC_PRINTF */
|
||
|
|
- char *s = awk_printf(op1);
|
||
|
|
+ char *s = awk_printf(op1, &len);
|
||
|
|
+#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
+ fwrite(s, len, 1, F);
|
||
|
|
+#else
|
||
|
|
fputs(s, F);
|
||
|
|
+#endif
|
||
|
|
free(s);
|
||
|
|
}
|
||
|
|
fflush(F);
|
||
|
|
@@ -2978,7 +2990,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_SPRINTF ):
|
||
|
|
- setvar_p(res, awk_printf(op1));
|
||
|
|
+ setvar_p(res, awk_printf(op1, NULL));
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_UNARY ): {
|
||
|
|
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
|
||
|
|
index 92c83d719..cf9b722dc 100755
|
||
|
|
--- a/testsuite/awk.tests
|
||
|
|
+++ b/testsuite/awk.tests
|
||
|
|
@@ -383,6 +383,11 @@ testing "awk errors on missing delete arg" \
|
||
|
|
"awk -e '{delete}' 2>&1" "awk: cmd. line:1: Too few arguments\n" "" ""
|
||
|
|
SKIP=
|
||
|
|
|
||
|
|
+optional FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
+testing "awk printf('%c') can output NUL" \
|
||
|
|
+ "awk '{printf(\"hello%c null\n\", 0)}'" "hello\0 null\n" "" "\n"
|
||
|
|
+SKIP=
|
||
|
|
+
|
||
|
|
# testing "description" "command" "result" "infile" "stdin"
|
||
|
|
testing 'awk negative field access' \
|
||
|
|
'awk 2>&1 -- '\''{ $(-1) }'\' \
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 9dcd2d5cc91bde2d6cdd038ed23408057d6f6429 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Wed, 16 Jun 2021 09:18:08 +0200
|
||
|
|
Subject: [PATCH 02/61] awk: fix use-after-free in "$BIGNUM1 $BIGGERNUM2"
|
||
|
|
concat op
|
||
|
|
|
||
|
|
Second reference to a field reallocs/moves Fields[] array, but first ref
|
||
|
|
still tries to use the element where it was before move.
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
fsrealloc 94 106 +12
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 85 ++++++++++++++++++++++++++++++++++++++++++---------
|
||
|
|
1 file changed, 71 insertions(+), 14 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index b4f6a3741..48836298c 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1745,12 +1745,22 @@ static char* qrealloc(char *b, int n, int *size)
|
||
|
|
/* resize field storage space */
|
||
|
|
static void fsrealloc(int size)
|
||
|
|
{
|
||
|
|
- int i;
|
||
|
|
+ int i, newsize;
|
||
|
|
|
||
|
|
if (size >= maxfields) {
|
||
|
|
+ /* Sanity cap, easier than catering for overflows */
|
||
|
|
+ if (size > 0xffffff)
|
||
|
|
+ bb_die_memory_exhausted();
|
||
|
|
+
|
||
|
|
i = maxfields;
|
||
|
|
maxfields = size + 16;
|
||
|
|
- Fields = xrealloc(Fields, maxfields * sizeof(Fields[0]));
|
||
|
|
+
|
||
|
|
+ newsize = maxfields * sizeof(Fields[0]);
|
||
|
|
+ debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
|
||
|
|
+ Fields = xrealloc(Fields, newsize);
|
||
|
|
+ debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
|
||
|
|
+ /* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */
|
||
|
|
+
|
||
|
|
for (; i < maxfields; i++) {
|
||
|
|
Fields[i].type = VF_SPECIAL;
|
||
|
|
Fields[i].string = NULL;
|
||
|
|
@@ -2614,20 +2624,30 @@ static var *evaluate(node *op, var *res)
|
||
|
|
/* execute inevitable things */
|
||
|
|
if (opinfo & OF_RES1)
|
||
|
|
L.v = evaluate(op1, v1);
|
||
|
|
- if (opinfo & OF_RES2)
|
||
|
|
- R.v = evaluate(op->r.n, v1+1);
|
||
|
|
if (opinfo & OF_STR1) {
|
||
|
|
L.s = getvar_s(L.v);
|
||
|
|
debug_printf_eval("L.s:'%s'\n", L.s);
|
||
|
|
}
|
||
|
|
- if (opinfo & OF_STR2) {
|
||
|
|
- R.s = getvar_s(R.v);
|
||
|
|
- debug_printf_eval("R.s:'%s'\n", R.s);
|
||
|
|
- }
|
||
|
|
if (opinfo & OF_NUM1) {
|
||
|
|
L_d = getvar_i(L.v);
|
||
|
|
debug_printf_eval("L_d:%f\n", L_d);
|
||
|
|
}
|
||
|
|
+ /* NB: Must get string/numeric values of L (done above)
|
||
|
|
+ * _before_ evaluate()'ing R.v: if both L and R are $NNNs,
|
||
|
|
+ * and right one is large, then L.v points to Fields[NNN1],
|
||
|
|
+ * second evaluate() reallocates and moves (!) Fields[],
|
||
|
|
+ * R.v points to Fields[NNN2] but L.v now points to freed mem!
|
||
|
|
+ * (Seen trying to evaluate "$444 $44444")
|
||
|
|
+ */
|
||
|
|
+ if (opinfo & OF_RES2) {
|
||
|
|
+ R.v = evaluate(op->r.n, v1+1);
|
||
|
|
+ //TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
|
||
|
|
+ //L.v = NULL;
|
||
|
|
+ }
|
||
|
|
+ if (opinfo & OF_STR2) {
|
||
|
|
+ R.s = getvar_s(R.v);
|
||
|
|
+ debug_printf_eval("R.s:'%s'\n", R.s);
|
||
|
|
+ }
|
||
|
|
|
||
|
|
debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
|
||
|
|
switch (XC(opinfo & OPCLSMASK)) {
|
||
|
|
@@ -2636,6 +2656,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
|
||
|
|
/* test pattern */
|
||
|
|
case XC( OC_TEST ):
|
||
|
|
+ debug_printf_eval("TEST\n");
|
||
|
|
if ((op1->info & OPCLSMASK) == OC_COMMA) {
|
||
|
|
/* it's range pattern */
|
||
|
|
if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
|
||
|
|
@@ -2653,25 +2674,32 @@ static var *evaluate(node *op, var *res)
|
||
|
|
|
||
|
|
/* just evaluate an expression, also used as unconditional jump */
|
||
|
|
case XC( OC_EXEC ):
|
||
|
|
+ debug_printf_eval("EXEC\n");
|
||
|
|
break;
|
||
|
|
|
||
|
|
/* branch, used in if-else and various loops */
|
||
|
|
case XC( OC_BR ):
|
||
|
|
+ debug_printf_eval("BR\n");
|
||
|
|
op = istrue(L.v) ? op->a.n : op->r.n;
|
||
|
|
break;
|
||
|
|
|
||
|
|
/* initialize for-in loop */
|
||
|
|
case XC( OC_WALKINIT ):
|
||
|
|
+ debug_printf_eval("WALKINIT\n");
|
||
|
|
hashwalk_init(L.v, iamarray(R.v));
|
||
|
|
break;
|
||
|
|
|
||
|
|
/* get next array item */
|
||
|
|
case XC( OC_WALKNEXT ):
|
||
|
|
+ debug_printf_eval("WALKNEXT\n");
|
||
|
|
op = hashwalk_next(L.v) ? op->a.n : op->r.n;
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_PRINT ):
|
||
|
|
- case XC( OC_PRINTF ): {
|
||
|
|
+ debug_printf_eval("PRINT /\n");
|
||
|
|
+ case XC( OC_PRINTF ):
|
||
|
|
+ debug_printf_eval("PRINTF\n");
|
||
|
|
+ {
|
||
|
|
FILE *F = stdout;
|
||
|
|
IF_FEATURE_AWK_GNU_EXTENSIONS(int len;)
|
||
|
|
|
||
|
|
@@ -2726,22 +2754,28 @@ static var *evaluate(node *op, var *res)
|
||
|
|
/* case XC( OC_DELETE ): - moved to happen before arg evaluation */
|
||
|
|
|
||
|
|
case XC( OC_NEWSOURCE ):
|
||
|
|
+ debug_printf_eval("NEWSOURCE\n");
|
||
|
|
g_progname = op->l.new_progname;
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_RETURN ):
|
||
|
|
+ debug_printf_eval("RETURN\n");
|
||
|
|
copyvar(res, L.v);
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_NEXTFILE ):
|
||
|
|
+ debug_printf_eval("NEXTFILE\n");
|
||
|
|
nextfile = TRUE;
|
||
|
|
case XC( OC_NEXT ):
|
||
|
|
+ debug_printf_eval("NEXT\n");
|
||
|
|
nextrec = TRUE;
|
||
|
|
case XC( OC_DONE ):
|
||
|
|
+ debug_printf_eval("DONE\n");
|
||
|
|
clrvar(res);
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_EXIT ):
|
||
|
|
+ debug_printf_eval("EXIT\n");
|
||
|
|
awk_exit(L_d);
|
||
|
|
|
||
|
|
/* -- recursive node type -- */
|
||
|
|
@@ -2761,15 +2795,18 @@ static var *evaluate(node *op, var *res)
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_IN ):
|
||
|
|
+ debug_printf_eval("IN\n");
|
||
|
|
setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_REGEXP ):
|
||
|
|
+ debug_printf_eval("REGEXP\n");
|
||
|
|
op1 = op;
|
||
|
|
L.s = getvar_s(intvar[F0]);
|
||
|
|
goto re_cont;
|
||
|
|
|
||
|
|
case XC( OC_MATCH ):
|
||
|
|
+ debug_printf_eval("MATCH\n");
|
||
|
|
op1 = op->r.n;
|
||
|
|
re_cont:
|
||
|
|
{
|
||
|
|
@@ -2795,6 +2832,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_TERNARY ):
|
||
|
|
+ debug_printf_eval("TERNARY\n");
|
||
|
|
if ((op->r.n->info & OPCLSMASK) != OC_COLON)
|
||
|
|
syntax_error(EMSG_POSSIBLE_ERROR);
|
||
|
|
res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
|
||
|
|
@@ -2803,6 +2841,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
case XC( OC_FUNC ): {
|
||
|
|
var *vbeg, *v;
|
||
|
|
const char *sv_progname;
|
||
|
|
+ debug_printf_eval("FUNC\n");
|
||
|
|
|
||
|
|
/* The body might be empty, still has to eval the args */
|
||
|
|
if (!op->r.n->info && !op->r.f->body.first)
|
||
|
|
@@ -2832,7 +2871,10 @@ static var *evaluate(node *op, var *res)
|
||
|
|
}
|
||
|
|
|
||
|
|
case XC( OC_GETLINE ):
|
||
|
|
- case XC( OC_PGETLINE ): {
|
||
|
|
+ debug_printf_eval("GETLINE /\n");
|
||
|
|
+ case XC( OC_PGETLINE ):
|
||
|
|
+ debug_printf_eval("PGETLINE\n");
|
||
|
|
+ {
|
||
|
|
rstream *rsm;
|
||
|
|
int i;
|
||
|
|
|
||
|
|
@@ -2873,6 +2915,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
/* simple builtins */
|
||
|
|
case XC( OC_FBLTIN ): {
|
||
|
|
double R_d = R_d; /* for compiler */
|
||
|
|
+ debug_printf_eval("FBLTIN\n");
|
||
|
|
|
||
|
|
switch (opn) {
|
||
|
|
case F_in:
|
||
|
|
@@ -2986,14 +3029,18 @@ static var *evaluate(node *op, var *res)
|
||
|
|
}
|
||
|
|
|
||
|
|
case XC( OC_BUILTIN ):
|
||
|
|
+ debug_printf_eval("BUILTIN\n");
|
||
|
|
res = exec_builtin(op, res);
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_SPRINTF ):
|
||
|
|
+ debug_printf_eval("SPRINTF\n");
|
||
|
|
setvar_p(res, awk_printf(op1, NULL));
|
||
|
|
break;
|
||
|
|
|
||
|
|
- case XC( OC_UNARY ): {
|
||
|
|
+ case XC( OC_UNARY ):
|
||
|
|
+ debug_printf_eval("UNARY\n");
|
||
|
|
+ {
|
||
|
|
double Ld, R_d;
|
||
|
|
|
||
|
|
Ld = R_d = getvar_i(R.v);
|
||
|
|
@@ -3023,7 +3070,9 @@ static var *evaluate(node *op, var *res)
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
- case XC( OC_FIELD ): {
|
||
|
|
+ case XC( OC_FIELD ):
|
||
|
|
+ debug_printf_eval("FIELD\n");
|
||
|
|
+ {
|
||
|
|
int i = (int)getvar_i(R.v);
|
||
|
|
if (i < 0)
|
||
|
|
syntax_error(EMSG_NEGATIVE_FIELD);
|
||
|
|
@@ -3040,8 +3089,10 @@ static var *evaluate(node *op, var *res)
|
||
|
|
|
||
|
|
/* concatenation (" ") and index joining (",") */
|
||
|
|
case XC( OC_CONCAT ):
|
||
|
|
+ debug_printf_eval("CONCAT /\n");
|
||
|
|
case XC( OC_COMMA ): {
|
||
|
|
const char *sep = "";
|
||
|
|
+ debug_printf_eval("COMMA\n");
|
||
|
|
if ((opinfo & OPCLSMASK) == OC_COMMA)
|
||
|
|
sep = getvar_s(intvar[SUBSEP]);
|
||
|
|
setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
|
||
|
|
@@ -3049,17 +3100,22 @@ static var *evaluate(node *op, var *res)
|
||
|
|
}
|
||
|
|
|
||
|
|
case XC( OC_LAND ):
|
||
|
|
+ debug_printf_eval("LAND\n");
|
||
|
|
setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_LOR ):
|
||
|
|
+ debug_printf_eval("LOR\n");
|
||
|
|
setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_BINARY ):
|
||
|
|
- case XC( OC_REPLACE ): {
|
||
|
|
+ debug_printf_eval("BINARY /\n");
|
||
|
|
+ case XC( OC_REPLACE ):
|
||
|
|
+ debug_printf_eval("REPLACE\n");
|
||
|
|
+ {
|
||
|
|
double R_d = getvar_i(R.v);
|
||
|
|
- debug_printf_eval("BINARY/REPLACE: R_d:%f opn:%c\n", R_d, opn);
|
||
|
|
+ debug_printf_eval("R_d:%f opn:%c\n", R_d, opn);
|
||
|
|
switch (opn) {
|
||
|
|
case '+':
|
||
|
|
L_d += R_d;
|
||
|
|
@@ -3095,6 +3151,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
case XC( OC_COMPARE ): {
|
||
|
|
int i = i; /* for compiler */
|
||
|
|
double Ld;
|
||
|
|
+ debug_printf_eval("COMPARE\n");
|
||
|
|
|
||
|
|
if (is_numeric(L.v) && is_numeric(R.v)) {
|
||
|
|
Ld = getvar_i(L.v) - getvar_i(R.v);
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 1d5e5492dd8368ee3870bcd390754aa7c3f8956c Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 18 Jun 2021 16:35:27 +0200
|
||
|
|
Subject: [PATCH 03/61] awk: after preinc/dec, only allow variable, field ref,
|
||
|
|
array ref, or another preinc/dec
|
||
|
|
|
||
|
|
Accepting nonsense like "--4", and even "-- -4" is confusing.
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
parse_expr 917 938 +21
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 87 ++++++++++++++++++++++++++++++++++++++++-----------
|
||
|
|
1 file changed, 69 insertions(+), 18 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 48836298c..2563722f9 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -66,6 +66,8 @@
|
||
|
|
#endif
|
||
|
|
#ifndef debug_printf_parse
|
||
|
|
# define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
|
||
|
|
+#else
|
||
|
|
+# define debug_parse_print_tc(...) ((void)0)
|
||
|
|
#endif
|
||
|
|
|
||
|
|
|
||
|
|
@@ -210,13 +212,13 @@ typedef struct tsplitter_s {
|
||
|
|
#define TC_SEQTERM (1 << 1) /* ) */
|
||
|
|
#define TC_REGEXP (1 << 2) /* /.../ */
|
||
|
|
#define TC_OUTRDR (1 << 3) /* | > >> */
|
||
|
|
-#define TC_UOPPOST (1 << 4) /* unary postfix operator */
|
||
|
|
-#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
|
||
|
|
+#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */
|
||
|
|
+#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */
|
||
|
|
#define TC_BINOPX (1 << 6) /* two-opnd operator */
|
||
|
|
#define TC_IN (1 << 7)
|
||
|
|
#define TC_COMMA (1 << 8)
|
||
|
|
#define TC_PIPE (1 << 9) /* input redirection pipe */
|
||
|
|
-#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
|
||
|
|
+#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
|
||
|
|
#define TC_ARRTERM (1 << 11) /* ] */
|
||
|
|
#define TC_GRPSTART (1 << 12) /* { */
|
||
|
|
#define TC_GRPTERM (1 << 13) /* } */
|
||
|
|
@@ -243,14 +245,51 @@ typedef struct tsplitter_s {
|
||
|
|
#define TC_STRING (1 << 29)
|
||
|
|
#define TC_NUMBER (1 << 30)
|
||
|
|
|
||
|
|
-#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
|
||
|
|
+#ifndef debug_parse_print_tc
|
||
|
|
+#define debug_parse_print_tc(n) do { \
|
||
|
|
+if ((n) & TC_SEQSTART) debug_printf_parse(" SEQSTART"); \
|
||
|
|
+if ((n) & TC_SEQTERM ) debug_printf_parse(" SEQTERM" ); \
|
||
|
|
+if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \
|
||
|
|
+if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \
|
||
|
|
+if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \
|
||
|
|
+if ((n) & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" ); \
|
||
|
|
+if ((n) & TC_BINOPX ) debug_printf_parse(" BINOPX" ); \
|
||
|
|
+if ((n) & TC_IN ) debug_printf_parse(" IN" ); \
|
||
|
|
+if ((n) & TC_COMMA ) debug_printf_parse(" COMMA" ); \
|
||
|
|
+if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \
|
||
|
|
+if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \
|
||
|
|
+if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \
|
||
|
|
+if ((n) & TC_GRPSTART) debug_printf_parse(" GRPSTART"); \
|
||
|
|
+if ((n) & TC_GRPTERM ) debug_printf_parse(" GRPTERM" ); \
|
||
|
|
+if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \
|
||
|
|
+if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \
|
||
|
|
+if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \
|
||
|
|
+if ((n) & TC_WHILE ) debug_printf_parse(" WHILE" ); \
|
||
|
|
+if ((n) & TC_ELSE ) debug_printf_parse(" ELSE" ); \
|
||
|
|
+if ((n) & TC_BUILTIN ) debug_printf_parse(" BUILTIN" ); \
|
||
|
|
+if ((n) & TC_LENGTH ) debug_printf_parse(" LENGTH" ); \
|
||
|
|
+if ((n) & TC_GETLINE ) debug_printf_parse(" GETLINE" ); \
|
||
|
|
+if ((n) & TC_FUNCDECL) debug_printf_parse(" FUNCDECL"); \
|
||
|
|
+if ((n) & TC_BEGIN ) debug_printf_parse(" BEGIN" ); \
|
||
|
|
+if ((n) & TC_END ) debug_printf_parse(" END" ); \
|
||
|
|
+if ((n) & TC_EOF ) debug_printf_parse(" EOF" ); \
|
||
|
|
+if ((n) & TC_VARIABLE) debug_printf_parse(" VARIABLE"); \
|
||
|
|
+if ((n) & TC_ARRAY ) debug_printf_parse(" ARRAY" ); \
|
||
|
|
+if ((n) & TC_FUNCTION) debug_printf_parse(" FUNCTION"); \
|
||
|
|
+if ((n) & TC_STRING ) debug_printf_parse(" STRING" ); \
|
||
|
|
+if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
|
||
|
|
+} while (0)
|
||
|
|
+#endif
|
||
|
|
|
||
|
|
/* combined token classes */
|
||
|
|
+#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
|
||
|
|
+
|
||
|
|
#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
|
||
|
|
//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
|
||
|
|
#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
|
||
|
|
| TC_BUILTIN | TC_LENGTH | TC_GETLINE \
|
||
|
|
| TC_SEQSTART | TC_STRING | TC_NUMBER)
|
||
|
|
+#define TC_LVALUE (TC_VARIABLE | TC_ARRAY)
|
||
|
|
|
||
|
|
#define TC_STATEMNT (TC_STATX | TC_WHILE)
|
||
|
|
#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
|
||
|
|
@@ -284,7 +323,6 @@ typedef struct tsplitter_s {
|
||
|
|
#define OF_CHECKED 0x200000
|
||
|
|
#define OF_REQUIRED 0x400000
|
||
|
|
|
||
|
|
-
|
||
|
|
/* combined operator flags */
|
||
|
|
#define xx 0
|
||
|
|
#define xV OF_RES2
|
||
|
|
@@ -313,10 +351,8 @@ typedef struct tsplitter_s {
|
||
|
|
#define PRIMASK2 0x7E000000
|
||
|
|
|
||
|
|
/* Operation classes */
|
||
|
|
-
|
||
|
|
#define SHIFT_TIL_THIS 0x0600
|
||
|
|
#define RECUR_FROM_THIS 0x1000
|
||
|
|
-
|
||
|
|
enum {
|
||
|
|
OC_DELETE = 0x0100, OC_EXEC = 0x0200, OC_NEWSOURCE = 0x0300,
|
||
|
|
OC_PRINT = 0x0400, OC_PRINTF = 0x0500, OC_WALKINIT = 0x0600,
|
||
|
|
@@ -411,7 +447,9 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
OC_REGEXP,
|
||
|
|
xS|'a', xS|'w', xS|'|',
|
||
|
|
OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
|
||
|
|
- OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M', OC_FIELD|xV|P(5),
|
||
|
|
+#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
|
||
|
|
+#define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
|
||
|
|
+ TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5),
|
||
|
|
OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
|
||
|
|
OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
|
||
|
|
OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
|
||
|
|
@@ -1070,6 +1108,10 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
uint32_t tc;
|
||
|
|
const uint32_t *ti;
|
||
|
|
|
||
|
|
+ debug_printf_parse("%s() expected(%x):", __func__, expected);
|
||
|
|
+ debug_parse_print_tc(expected);
|
||
|
|
+ debug_printf_parse("\n");
|
||
|
|
+
|
||
|
|
if (t_rollback) {
|
||
|
|
debug_printf_parse("%s: using rolled-back token\n", __func__);
|
||
|
|
t_rollback = FALSE;
|
||
|
|
@@ -1226,7 +1268,9 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
|
||
|
|
}
|
||
|
|
|
||
|
|
- debug_printf_parse("%s: returning, ltclass:%x t_double:%f\n", __func__, ltclass, t_double);
|
||
|
|
+ debug_printf_parse("%s: returning, t_double:%f ltclass:", __func__, t_double);
|
||
|
|
+ debug_parse_print_tc(ltclass);
|
||
|
|
+ debug_printf_parse("\n");
|
||
|
|
return ltclass;
|
||
|
|
#undef concat_inserted
|
||
|
|
#undef save_tclass
|
||
|
|
@@ -1266,7 +1310,7 @@ static node *condition(void)
|
||
|
|
|
||
|
|
/* parse expression terminated by given argument, return ptr
|
||
|
|
* to built subtree. Terminator is eaten by parse_expr */
|
||
|
|
-static node *parse_expr(uint32_t iexp)
|
||
|
|
+static node *parse_expr(uint32_t term_tc)
|
||
|
|
{
|
||
|
|
node sn;
|
||
|
|
node *cn = &sn;
|
||
|
|
@@ -1274,13 +1318,15 @@ static node *parse_expr(uint32_t iexp)
|
||
|
|
uint32_t tc, xtc;
|
||
|
|
var *v;
|
||
|
|
|
||
|
|
- debug_printf_parse("%s(%x)\n", __func__, iexp);
|
||
|
|
+ debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
|
||
|
|
+ debug_parse_print_tc(term_tc);
|
||
|
|
+ debug_printf_parse("\n");
|
||
|
|
|
||
|
|
sn.info = PRIMASK;
|
||
|
|
sn.r.n = sn.a.n = glptr = NULL;
|
||
|
|
- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
|
||
|
|
+ xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | term_tc;
|
||
|
|
|
||
|
|
- while (!((tc = next_token(xtc)) & iexp)) {
|
||
|
|
+ while (!((tc = next_token(xtc)) & term_tc)) {
|
||
|
|
|
||
|
|
if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
|
||
|
|
/* input redirection (<) attached to glptr node */
|
||
|
|
@@ -1313,25 +1359,28 @@ static node *parse_expr(uint32_t iexp)
|
||
|
|
next_token(TC_GETLINE);
|
||
|
|
/* give maximum priority to this pipe */
|
||
|
|
cn->info &= ~PRIMASK;
|
||
|
|
- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
|
||
|
|
+ xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc;
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
cn->r.n = vn;
|
||
|
|
- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
|
||
|
|
+ xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc;
|
||
|
|
}
|
||
|
|
vn->a.n = cn;
|
||
|
|
|
||
|
|
} else {
|
||
|
|
- debug_printf_parse("%s: other\n", __func__);
|
||
|
|
+ debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
|
||
|
|
/* for operands and prefix-unary operators, attach them
|
||
|
|
* to last node */
|
||
|
|
vn = cn;
|
||
|
|
cn = vn->r.n = new_node(t_info);
|
||
|
|
cn->a.n = vn;
|
||
|
|
+
|
||
|
|
xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
|
||
|
|
+ if (t_info == TI_PREINC || t_info == TI_PREDEC)
|
||
|
|
+ xtc = TC_LVALUE | TC_UOPPRE1;
|
||
|
|
if (tc & (TC_OPERAND | TC_REGEXP)) {
|
||
|
|
debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
|
||
|
|
- xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | iexp;
|
||
|
|
+ xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | term_tc;
|
||
|
|
/* one should be very careful with switch on tclass -
|
||
|
|
* only simple tclasses should be used! */
|
||
|
|
switch (tc) {
|
||
|
|
@@ -1388,7 +1437,7 @@ static node *parse_expr(uint32_t iexp)
|
||
|
|
case TC_GETLINE:
|
||
|
|
debug_printf_parse("%s: TC_GETLINE\n", __func__);
|
||
|
|
glptr = cn;
|
||
|
|
- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
|
||
|
|
+ xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc;
|
||
|
|
break;
|
||
|
|
|
||
|
|
case TC_BUILTIN:
|
||
|
|
@@ -1603,6 +1652,8 @@ static void parse_program(char *p)
|
||
|
|
func *f;
|
||
|
|
var *v;
|
||
|
|
|
||
|
|
+ debug_printf_parse("%s()\n", __func__);
|
||
|
|
+
|
||
|
|
g_pos = p;
|
||
|
|
t_lineno = 1;
|
||
|
|
while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 3d0acb8934f496021a63471ef9e29c87520612a0 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sun, 20 Jun 2021 22:52:29 +0200
|
||
|
|
Subject: [PATCH 04/61] qwk: make code clearer, no actual code changes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 5 +++--
|
||
|
|
1 file changed, 3 insertions(+), 2 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 2563722f9..5f1d670a4 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -455,7 +455,8 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
|
||
|
|
OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
|
||
|
|
OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
|
||
|
|
- OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
|
||
|
|
+#define TI_LESS (OC_COMPARE|VV|P(39)|2)
|
||
|
|
+ TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
|
||
|
|
OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
|
||
|
|
OC_IN|SV|P(49), /* TC_IN */
|
||
|
|
OC_COMMA|SS|P(80),
|
||
|
|
@@ -1328,7 +1329,7 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
|
||
|
|
while (!((tc = next_token(xtc)) & term_tc)) {
|
||
|
|
|
||
|
|
- if (glptr && (t_info == (OC_COMPARE | VV | P(39) | 2))) {
|
||
|
|
+ if (glptr && (t_info == TI_LESS)) {
|
||
|
|
/* input redirection (<) attached to glptr node */
|
||
|
|
debug_printf_parse("%s: input redir\n", __func__);
|
||
|
|
cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 3c18df6595f8efc0229d7afc948b8ef38fb6f1aa Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 25 Jun 2021 19:38:27 +0200
|
||
|
|
Subject: [PATCH 05/61] awk: more efficient -f FILE, document what "some trick
|
||
|
|
in next_token" is
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
awk_main 890 898 +8
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 33 ++++++++++++++++++++++++---------
|
||
|
|
1 file changed, 24 insertions(+), 9 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 5f1d670a4..1b23c17d2 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1217,6 +1217,8 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
if (!isalnum_(*p))
|
||
|
|
syntax_error(EMSG_UNEXP_TOKEN); /* no */
|
||
|
|
/* yes */
|
||
|
|
+/* "move name one char back" trick: we need a byte for NUL terminator */
|
||
|
|
+/* NB: this results in argv[i][-1] being used (!!!) in e.g. "awk -e 'NAME'" case */
|
||
|
|
t_string = --p;
|
||
|
|
while (isalnum_(*++p)) {
|
||
|
|
p[-1] = *p;
|
||
|
|
@@ -3345,7 +3347,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
llist_t *list_e = NULL;
|
||
|
|
#endif
|
||
|
|
- int i, j;
|
||
|
|
+ int i;
|
||
|
|
var *v;
|
||
|
|
var tv;
|
||
|
|
char **envp;
|
||
|
|
@@ -3417,30 +3419,43 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
bb_show_usage();
|
||
|
|
}
|
||
|
|
while (list_f) {
|
||
|
|
- char *s = NULL;
|
||
|
|
- FILE *from_file;
|
||
|
|
+ int fd;
|
||
|
|
+ char *s;
|
||
|
|
|
||
|
|
g_progname = llist_pop(&list_f);
|
||
|
|
- from_file = xfopen_stdin(g_progname);
|
||
|
|
- /* one byte is reserved for some trick in next_token */
|
||
|
|
- for (i = j = 1; j > 0; i += j) {
|
||
|
|
- s = xrealloc(s, i + 4096);
|
||
|
|
- j = fread(s + i, 1, 4094, from_file);
|
||
|
|
+ fd = xopen_stdin(g_progname);
|
||
|
|
+ /* 1st byte is reserved for "move name one char back" trick in next_token */
|
||
|
|
+ i = 1;
|
||
|
|
+ s = NULL;
|
||
|
|
+ for (;;) {
|
||
|
|
+ int sz;
|
||
|
|
+ s = xrealloc(s, i + 1000);
|
||
|
|
+ sz = safe_read(fd, s + i, 1000);
|
||
|
|
+ if (sz <= 0)
|
||
|
|
+ break;
|
||
|
|
+ i += sz;
|
||
|
|
}
|
||
|
|
+ s = xrealloc(s, i + 1); /* trim unused 999 bytes */
|
||
|
|
s[i] = '\0';
|
||
|
|
- fclose(from_file);
|
||
|
|
+ close(fd);
|
||
|
|
parse_program(s + 1);
|
||
|
|
free(s);
|
||
|
|
}
|
||
|
|
g_progname = "cmd. line";
|
||
|
|
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
while (list_e) {
|
||
|
|
+ /* NB: "move name one char back" trick in next_token
|
||
|
|
+ * can use argv[i][-1] here.
|
||
|
|
+ */
|
||
|
|
parse_program(llist_pop(&list_e));
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
if (!(opt & (OPT_f | OPT_e))) {
|
||
|
|
if (!*argv)
|
||
|
|
bb_show_usage();
|
||
|
|
+ /* NB: "move name one char back" trick in next_token
|
||
|
|
+ * can use argv[i][-1] here.
|
||
|
|
+ */
|
||
|
|
parse_program(*argv++);
|
||
|
|
}
|
||
|
|
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From f8243879801f8d9d9fffbde592aee4264aa30d71 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 25 Jun 2021 19:41:05 +0200
|
||
|
|
Subject: [PATCH 06/61] awk: move locals deeper into scopes where they are
|
||
|
|
used, no logic changes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 62 ++++++++++++++++++++++++++-------------------------
|
||
|
|
1 file changed, 32 insertions(+), 30 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 1b23c17d2..86076d7b6 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -3254,20 +3254,19 @@ static var *evaluate(node *op, var *res)
|
||
|
|
|
||
|
|
static int awk_exit(int r)
|
||
|
|
{
|
||
|
|
- var tv;
|
||
|
|
unsigned i;
|
||
|
|
- hash_item *hi;
|
||
|
|
-
|
||
|
|
- zero_out_var(&tv);
|
||
|
|
|
||
|
|
if (!exiting) {
|
||
|
|
+ var tv;
|
||
|
|
exiting = TRUE;
|
||
|
|
nextrec = FALSE;
|
||
|
|
+ zero_out_var(&tv);
|
||
|
|
evaluate(endseq.first, &tv);
|
||
|
|
}
|
||
|
|
|
||
|
|
/* waiting for children */
|
||
|
|
for (i = 0; i < fdhash->csize; i++) {
|
||
|
|
+ hash_item *hi;
|
||
|
|
hi = fdhash->items[i];
|
||
|
|
while (hi) {
|
||
|
|
if (hi->data.rs.F && hi->data.rs.is_pipe)
|
||
|
|
@@ -3348,11 +3347,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
llist_t *list_e = NULL;
|
||
|
|
#endif
|
||
|
|
int i;
|
||
|
|
- var *v;
|
||
|
|
var tv;
|
||
|
|
- char **envp;
|
||
|
|
- char *vnames = (char *)vNames; /* cheat */
|
||
|
|
- char *vvalues = (char *)vValues;
|
||
|
|
|
||
|
|
INIT_G();
|
||
|
|
|
||
|
|
@@ -3361,8 +3356,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
if (ENABLE_LOCALE_SUPPORT)
|
||
|
|
setlocale(LC_NUMERIC, "C");
|
||
|
|
|
||
|
|
- zero_out_var(&tv);
|
||
|
|
-
|
||
|
|
/* allocate global buffer */
|
||
|
|
g_buf = xmalloc(MAXVARFMT + 1);
|
||
|
|
|
||
|
|
@@ -3372,16 +3365,21 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
fnhash = hash_init();
|
||
|
|
|
||
|
|
/* initialize variables */
|
||
|
|
- for (i = 0; *vnames; i++) {
|
||
|
|
- intvar[i] = v = newvar(nextword(&vnames));
|
||
|
|
- if (*vvalues != '\377')
|
||
|
|
- setvar_s(v, nextword(&vvalues));
|
||
|
|
- else
|
||
|
|
- setvar_i(v, 0);
|
||
|
|
-
|
||
|
|
- if (*vnames == '*') {
|
||
|
|
- v->type |= VF_SPECIAL;
|
||
|
|
- vnames++;
|
||
|
|
+ {
|
||
|
|
+ char *vnames = (char *)vNames; /* cheat */
|
||
|
|
+ char *vvalues = (char *)vValues;
|
||
|
|
+ for (i = 0; *vnames; i++) {
|
||
|
|
+ var *v;
|
||
|
|
+ intvar[i] = v = newvar(nextword(&vnames));
|
||
|
|
+ if (*vvalues != '\377')
|
||
|
|
+ setvar_s(v, nextword(&vvalues));
|
||
|
|
+ else
|
||
|
|
+ setvar_i(v, 0);
|
||
|
|
+
|
||
|
|
+ if (*vnames == '*') {
|
||
|
|
+ v->type |= VF_SPECIAL;
|
||
|
|
+ vnames++;
|
||
|
|
+ }
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -3393,16 +3391,19 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
newfile("/dev/stderr")->F = stderr;
|
||
|
|
|
||
|
|
/* Huh, people report that sometimes environ is NULL. Oh well. */
|
||
|
|
- if (environ) for (envp = environ; *envp; envp++) {
|
||
|
|
- /* environ is writable, thus we don't strdup it needlessly */
|
||
|
|
- char *s = *envp;
|
||
|
|
- char *s1 = strchr(s, '=');
|
||
|
|
- if (s1) {
|
||
|
|
- *s1 = '\0';
|
||
|
|
- /* Both findvar and setvar_u take const char*
|
||
|
|
- * as 2nd arg -> environment is not trashed */
|
||
|
|
- setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
|
||
|
|
- *s1 = '=';
|
||
|
|
+ if (environ) {
|
||
|
|
+ char **envp;
|
||
|
|
+ for (envp = environ; *envp; envp++) {
|
||
|
|
+ /* environ is writable, thus we don't strdup it needlessly */
|
||
|
|
+ char *s = *envp;
|
||
|
|
+ char *s1 = strchr(s, '=');
|
||
|
|
+ if (s1) {
|
||
|
|
+ *s1 = '\0';
|
||
|
|
+ /* Both findvar and setvar_u take const char*
|
||
|
|
+ * as 2nd arg -> environment is not trashed */
|
||
|
|
+ setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
|
||
|
|
+ *s1 = '=';
|
||
|
|
+ }
|
||
|
|
}
|
||
|
|
}
|
||
|
|
opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
|
||
|
|
@@ -3466,6 +3467,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
setari_u(intvar[ARGV], ++i, *argv++);
|
||
|
|
setvar_i(intvar[ARGC], i + 1);
|
||
|
|
|
||
|
|
+ zero_out_var(&tv);
|
||
|
|
evaluate(beginseq.first, &tv);
|
||
|
|
if (!mainseq.first && !endseq.first)
|
||
|
|
awk_exit(EXIT_SUCCESS);
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From b52a50128d64e1f601e17507ffc118c180ef7b3d Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 01:03:42 +0200
|
||
|
|
Subject: [PATCH 07/61] awk: remove redundant check
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
next_token 785 784 -1
|
||
|
|
parse_program 337 328 -9
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-10) Total: -10 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 32 ++++++++++++++++++--------------
|
||
|
|
1 file changed, 18 insertions(+), 14 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 86076d7b6..9826a57c6 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1093,8 +1093,9 @@ static void nvfree(var *v)
|
||
|
|
|
||
|
|
/* ------- awk program text parsing ------- */
|
||
|
|
|
||
|
|
-/* Parse next token pointed by global pos, place results into global ttt.
|
||
|
|
- * If token isn't expected, give away. Return token class
|
||
|
|
+/* Parse next token pointed by global pos, place results into global t_XYZ variables.
|
||
|
|
+ * If token isn't expected, print error message and die.
|
||
|
|
+ * Return token class (also store it in t_tclass).
|
||
|
|
*/
|
||
|
|
static uint32_t next_token(uint32_t expected)
|
||
|
|
{
|
||
|
|
@@ -1248,33 +1249,35 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
goto readnext;
|
||
|
|
|
||
|
|
/* insert concatenation operator when needed */
|
||
|
|
- debug_printf_parse("%s: %x %x %x concat_inserted?\n", __func__,
|
||
|
|
- (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP));
|
||
|
|
+ debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
|
||
|
|
+ (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP),
|
||
|
|
+ !(ltclass == TC_LENGTH && tc == TC_SEQSTART));
|
||
|
|
if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)
|
||
|
|
&& !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
|
||
|
|
) {
|
||
|
|
concat_inserted = TRUE;
|
||
|
|
save_tclass = tc;
|
||
|
|
save_info = t_info;
|
||
|
|
- tc = TC_BINOP;
|
||
|
|
+ tc = TC_BINOPX;
|
||
|
|
t_info = OC_CONCAT | SS | P(35);
|
||
|
|
}
|
||
|
|
|
||
|
|
- debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, t_tclass);
|
||
|
|
t_tclass = tc;
|
||
|
|
+ debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc);
|
||
|
|
}
|
||
|
|
- ltclass = t_tclass;
|
||
|
|
-
|
||
|
|
/* Are we ready for this? */
|
||
|
|
- if (!(ltclass & expected)) {
|
||
|
|
+ if (!(t_tclass & expected)) {
|
||
|
|
syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
|
||
|
|
EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
|
||
|
|
}
|
||
|
|
|
||
|
|
- debug_printf_parse("%s: returning, t_double:%f ltclass:", __func__, t_double);
|
||
|
|
- debug_parse_print_tc(ltclass);
|
||
|
|
+ debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double);
|
||
|
|
+ debug_parse_print_tc(t_tclass);
|
||
|
|
debug_printf_parse("\n");
|
||
|
|
- return ltclass;
|
||
|
|
+
|
||
|
|
+ ltclass = t_tclass;
|
||
|
|
+
|
||
|
|
+ return t_tclass;
|
||
|
|
#undef concat_inserted
|
||
|
|
#undef save_tclass
|
||
|
|
#undef save_info
|
||
|
|
@@ -1700,8 +1703,9 @@ static void parse_program(char *p)
|
||
|
|
/* Arg followed either by end of arg list or 1 comma */
|
||
|
|
if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
|
||
|
|
break;
|
||
|
|
- if (t_tclass != TC_COMMA)
|
||
|
|
- syntax_error(EMSG_UNEXP_TOKEN);
|
||
|
|
+//Impossible: next_token() above would error out and die
|
||
|
|
+// if (t_tclass != TC_COMMA)
|
||
|
|
+// syntax_error(EMSG_UNEXP_TOKEN);
|
||
|
|
}
|
||
|
|
seq = &f->body;
|
||
|
|
chain_group();
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 96368c3613c1b01c42b7b382d01142a07c919f60 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 01:09:08 +0200
|
||
|
|
Subject: [PATCH 08/61] awk: make ltclass ("last token class") local to
|
||
|
|
next_token()
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
next_token 784 790 +6
|
||
|
|
next_input_file 219 216 -3
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 1/1 up/down: 6/-3) Total: 3 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 24 ++++++++++--------------
|
||
|
|
1 file changed, 10 insertions(+), 14 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 9826a57c6..418bda160 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -556,7 +556,6 @@ struct globals2 {
|
||
|
|
|
||
|
|
uint32_t next_token__save_tclass;
|
||
|
|
uint32_t next_token__save_info;
|
||
|
|
- uint32_t next_token__ltclass;
|
||
|
|
smallint next_token__concat_inserted;
|
||
|
|
|
||
|
|
smallint next_input_file__files_happen;
|
||
|
|
@@ -615,7 +614,7 @@ struct globals2 {
|
||
|
|
#define rsplitter (G.rsplitter )
|
||
|
|
#define INIT_G() do { \
|
||
|
|
SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
|
||
|
|
- G.next_token__ltclass = TC_OPTERM; \
|
||
|
|
+ t_tclass = TC_OPTERM; \
|
||
|
|
G.evaluate__seed = 1; \
|
||
|
|
} while (0)
|
||
|
|
|
||
|
|
@@ -1102,13 +1101,13 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
#define concat_inserted (G.next_token__concat_inserted)
|
||
|
|
#define save_tclass (G.next_token__save_tclass)
|
||
|
|
#define save_info (G.next_token__save_info)
|
||
|
|
-/* Initialized to TC_OPTERM: */
|
||
|
|
-#define ltclass (G.next_token__ltclass)
|
||
|
|
|
||
|
|
char *p, *s;
|
||
|
|
const char *tl;
|
||
|
|
- uint32_t tc;
|
||
|
|
const uint32_t *ti;
|
||
|
|
+ uint32_t tc, last_token_class;
|
||
|
|
+
|
||
|
|
+ last_token_class = t_tclass; /* t_tclass is initialized to TC_OPTERM */
|
||
|
|
|
||
|
|
debug_printf_parse("%s() expected(%x):", __func__, expected);
|
||
|
|
debug_parse_print_tc(expected);
|
||
|
|
@@ -1245,15 +1244,15 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
g_pos = p;
|
||
|
|
|
||
|
|
/* skipping newlines in some cases */
|
||
|
|
- if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
|
||
|
|
+ if ((last_token_class & TC_NOTERM) && (tc & TC_NEWLINE))
|
||
|
|
goto readnext;
|
||
|
|
|
||
|
|
/* insert concatenation operator when needed */
|
||
|
|
debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
|
||
|
|
- (ltclass & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP),
|
||
|
|
- !(ltclass == TC_LENGTH && tc == TC_SEQSTART));
|
||
|
|
- if ((ltclass & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)
|
||
|
|
- && !(ltclass == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
|
||
|
|
+ (last_token_class & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP),
|
||
|
|
+ !(last_token_class == TC_LENGTH && tc == TC_SEQSTART));
|
||
|
|
+ if ((last_token_class & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)
|
||
|
|
+ && !(last_token_class == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
|
||
|
|
) {
|
||
|
|
concat_inserted = TRUE;
|
||
|
|
save_tclass = tc;
|
||
|
|
@@ -1267,7 +1266,7 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
}
|
||
|
|
/* Are we ready for this? */
|
||
|
|
if (!(t_tclass & expected)) {
|
||
|
|
- syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
|
||
|
|
+ syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ?
|
||
|
|
EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -1275,13 +1274,10 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
debug_parse_print_tc(t_tclass);
|
||
|
|
debug_printf_parse("\n");
|
||
|
|
|
||
|
|
- ltclass = t_tclass;
|
||
|
|
-
|
||
|
|
return t_tclass;
|
||
|
|
#undef concat_inserted
|
||
|
|
#undef save_tclass
|
||
|
|
#undef save_info
|
||
|
|
-#undef ltclass
|
||
|
|
}
|
||
|
|
|
||
|
|
static void rollback_token(void)
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 8b51ddd054a3454171440035ed7f125483e9697c Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 01:23:37 +0200
|
||
|
|
Subject: [PATCH 09/61] awk: use TS_foo for combined token classes. No code
|
||
|
|
changes
|
||
|
|
|
||
|
|
Confusion with "simple" classes was the cause of a bug fixed by previous commit
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 128 +++++++++++++++++++++++++-------------------------
|
||
|
|
1 file changed, 64 insertions(+), 64 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 418bda160..764a3dd49 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -281,39 +281,39 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
|
||
|
|
} while (0)
|
||
|
|
#endif
|
||
|
|
|
||
|
|
-/* combined token classes */
|
||
|
|
-#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
|
||
|
|
+/* combined token classes ("token [class] sets") */
|
||
|
|
+#define TS_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
|
||
|
|
|
||
|
|
-#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
|
||
|
|
-//#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
|
||
|
|
-#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
|
||
|
|
- | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
|
||
|
|
- | TC_SEQSTART | TC_STRING | TC_NUMBER)
|
||
|
|
-#define TC_LVALUE (TC_VARIABLE | TC_ARRAY)
|
||
|
|
+#define TS_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
|
||
|
|
+//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST)
|
||
|
|
+#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
|
||
|
|
+ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
|
||
|
|
+ | TC_SEQSTART | TC_STRING | TC_NUMBER)
|
||
|
|
|
||
|
|
-#define TC_STATEMNT (TC_STATX | TC_WHILE)
|
||
|
|
-#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
|
||
|
|
+#define TS_LVALUE (TC_VARIABLE | TC_ARRAY)
|
||
|
|
+#define TS_STATEMNT (TC_STATX | TC_WHILE)
|
||
|
|
+#define TS_OPTERM (TC_SEMICOL | TC_NEWLINE)
|
||
|
|
|
||
|
|
/* word tokens, cannot mean something else if not expected */
|
||
|
|
-#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE \
|
||
|
|
- | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
|
||
|
|
- | TC_FUNCDECL | TC_BEGIN | TC_END)
|
||
|
|
+#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \
|
||
|
|
+ | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
|
||
|
|
+ | TC_FUNCDECL | TC_BEGIN | TC_END)
|
||
|
|
|
||
|
|
/* discard newlines after these */
|
||
|
|
-#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
|
||
|
|
- | TC_BINOP | TC_OPTERM)
|
||
|
|
+#define TS_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
|
||
|
|
+ | TS_BINOP | TS_OPTERM)
|
||
|
|
|
||
|
|
/* what can expression begin with */
|
||
|
|
-#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
|
||
|
|
+#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
|
||
|
|
/* what can group begin with */
|
||
|
|
-#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
|
||
|
|
+#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_GRPSTART)
|
||
|
|
|
||
|
|
-/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
|
||
|
|
+/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
|
||
|
|
/* operator is inserted between them */
|
||
|
|
-#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
|
||
|
|
+#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
|
||
|
|
| TC_STRING | TC_NUMBER | TC_UOPPOST \
|
||
|
|
| TC_LENGTH)
|
||
|
|
-#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
|
||
|
|
+#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
|
||
|
|
|
||
|
|
#define OF_RES1 0x010000
|
||
|
|
#define OF_RES2 0x020000
|
||
|
|
@@ -614,7 +614,7 @@ struct globals2 {
|
||
|
|
#define rsplitter (G.rsplitter )
|
||
|
|
#define INIT_G() do { \
|
||
|
|
SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
|
||
|
|
- t_tclass = TC_OPTERM; \
|
||
|
|
+ t_tclass = TS_OPTERM; \
|
||
|
|
G.evaluate__seed = 1; \
|
||
|
|
} while (0)
|
||
|
|
|
||
|
|
@@ -1107,7 +1107,7 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
const uint32_t *ti;
|
||
|
|
uint32_t tc, last_token_class;
|
||
|
|
|
||
|
|
- last_token_class = t_tclass; /* t_tclass is initialized to TC_OPTERM */
|
||
|
|
+ last_token_class = t_tclass; /* t_tclass is initialized to TS_OPTERM */
|
||
|
|
|
||
|
|
debug_printf_parse("%s() expected(%x):", __func__, expected);
|
||
|
|
debug_parse_print_tc(expected);
|
||
|
|
@@ -1198,9 +1198,9 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
* token matches,
|
||
|
|
* and it's not a longer word,
|
||
|
|
*/
|
||
|
|
- if ((tc & (expected | TC_WORD | TC_NEWLINE))
|
||
|
|
+ if ((tc & (expected | TS_WORD | TC_NEWLINE))
|
||
|
|
&& strncmp(p, tl, l) == 0
|
||
|
|
- && !((tc & TC_WORD) && isalnum_(p[l]))
|
||
|
|
+ && !((tc & TS_WORD) && isalnum_(p[l]))
|
||
|
|
) {
|
||
|
|
/* then this is what we are looking for */
|
||
|
|
t_info = *ti;
|
||
|
|
@@ -1244,14 +1244,14 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
g_pos = p;
|
||
|
|
|
||
|
|
/* skipping newlines in some cases */
|
||
|
|
- if ((last_token_class & TC_NOTERM) && (tc & TC_NEWLINE))
|
||
|
|
+ if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE))
|
||
|
|
goto readnext;
|
||
|
|
|
||
|
|
/* insert concatenation operator when needed */
|
||
|
|
debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
|
||
|
|
- (last_token_class & TC_CONCAT1), (tc & TC_CONCAT2), (expected & TC_BINOP),
|
||
|
|
+ (last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
|
||
|
|
!(last_token_class == TC_LENGTH && tc == TC_SEQSTART));
|
||
|
|
- if ((last_token_class & TC_CONCAT1) && (tc & TC_CONCAT2) && (expected & TC_BINOP)
|
||
|
|
+ if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
|
||
|
|
&& !(last_token_class == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
|
||
|
|
) {
|
||
|
|
concat_inserted = TRUE;
|
||
|
|
@@ -1317,7 +1317,7 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
node sn;
|
||
|
|
node *cn = &sn;
|
||
|
|
node *vn, *glptr;
|
||
|
|
- uint32_t tc, xtc;
|
||
|
|
+ uint32_t tc, expected_tc;
|
||
|
|
var *v;
|
||
|
|
|
||
|
|
debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
|
||
|
|
@@ -1326,20 +1326,20 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
|
||
|
|
sn.info = PRIMASK;
|
||
|
|
sn.r.n = sn.a.n = glptr = NULL;
|
||
|
|
- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | term_tc;
|
||
|
|
+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
|
||
|
|
|
||
|
|
- while (!((tc = next_token(xtc)) & term_tc)) {
|
||
|
|
+ while (!((tc = next_token(expected_tc)) & term_tc)) {
|
||
|
|
|
||
|
|
if (glptr && (t_info == TI_LESS)) {
|
||
|
|
/* input redirection (<) attached to glptr node */
|
||
|
|
debug_printf_parse("%s: input redir\n", __func__);
|
||
|
|
cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
|
||
|
|
cn->a.n = glptr;
|
||
|
|
- xtc = TC_OPERAND | TC_UOPPRE;
|
||
|
|
+ expected_tc = TS_OPERAND | TS_UOPPRE;
|
||
|
|
glptr = NULL;
|
||
|
|
|
||
|
|
- } else if (tc & (TC_BINOP | TC_UOPPOST)) {
|
||
|
|
- debug_printf_parse("%s: TC_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
|
||
|
|
+ } else if (tc & (TS_BINOP | TC_UOPPOST)) {
|
||
|
|
+ debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
|
||
|
|
/* for binary and postfix-unary operators, jump back over
|
||
|
|
* previous operators with higher priority */
|
||
|
|
vn = cn;
|
||
|
|
@@ -1353,19 +1353,19 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
t_info += P(6);
|
||
|
|
cn = vn->a.n->r.n = new_node(t_info);
|
||
|
|
cn->a.n = vn->a.n;
|
||
|
|
- if (tc & TC_BINOP) {
|
||
|
|
+ if (tc & TS_BINOP) {
|
||
|
|
cn->l.n = vn;
|
||
|
|
- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
|
||
|
|
+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
|
||
|
|
if ((t_info & OPCLSMASK) == OC_PGETLINE) {
|
||
|
|
/* it's a pipe */
|
||
|
|
next_token(TC_GETLINE);
|
||
|
|
/* give maximum priority to this pipe */
|
||
|
|
cn->info &= ~PRIMASK;
|
||
|
|
- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc;
|
||
|
|
+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
cn->r.n = vn;
|
||
|
|
- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc;
|
||
|
|
+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
|
||
|
|
}
|
||
|
|
vn->a.n = cn;
|
||
|
|
|
||
|
|
@@ -1377,14 +1377,14 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
cn = vn->r.n = new_node(t_info);
|
||
|
|
cn->a.n = vn;
|
||
|
|
|
||
|
|
- xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
|
||
|
|
+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
|
||
|
|
if (t_info == TI_PREINC || t_info == TI_PREDEC)
|
||
|
|
- xtc = TC_LVALUE | TC_UOPPRE1;
|
||
|
|
- if (tc & (TC_OPERAND | TC_REGEXP)) {
|
||
|
|
- debug_printf_parse("%s: TC_OPERAND | TC_REGEXP\n", __func__);
|
||
|
|
- xtc = TC_UOPPRE | TC_UOPPOST | TC_BINOP | TC_OPERAND | term_tc;
|
||
|
|
+ expected_tc = TS_LVALUE | TC_UOPPRE1;
|
||
|
|
+ if (tc & (TS_OPERAND | TC_REGEXP)) {
|
||
|
|
+ debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
|
||
|
|
+ expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
|
||
|
|
/* one should be very careful with switch on tclass -
|
||
|
|
- * only simple tclasses should be used! */
|
||
|
|
+ * only simple tclasses should be used (TC_xyz, not TS_xyz) */
|
||
|
|
switch (tc) {
|
||
|
|
case TC_VARIABLE:
|
||
|
|
case TC_ARRAY:
|
||
|
|
@@ -1412,7 +1412,7 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
setvar_i(v, t_double);
|
||
|
|
else {
|
||
|
|
setvar_s(v, t_string);
|
||
|
|
- xtc &= ~TC_UOPPOST; /* "str"++ is not allowed */
|
||
|
|
+ expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
|
||
|
|
@@ -1439,7 +1439,7 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
case TC_GETLINE:
|
||
|
|
debug_printf_parse("%s: TC_GETLINE\n", __func__);
|
||
|
|
glptr = cn;
|
||
|
|
- xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | term_tc;
|
||
|
|
+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
|
||
|
|
break;
|
||
|
|
|
||
|
|
case TC_BUILTIN:
|
||
|
|
@@ -1450,7 +1450,7 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
case TC_LENGTH:
|
||
|
|
debug_printf_parse("%s: TC_LENGTH\n", __func__);
|
||
|
|
next_token(TC_SEQSTART /* length(...) */
|
||
|
|
- | TC_OPTERM /* length; (or newline)*/
|
||
|
|
+ | TS_OPTERM /* length; (or newline)*/
|
||
|
|
| TC_GRPTERM /* length } */
|
||
|
|
| TC_BINOPX /* length <op> NUM */
|
||
|
|
| TC_COMMA /* print length, 1 */
|
||
|
|
@@ -1464,7 +1464,7 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
- }
|
||
|
|
+ } /* while() */
|
||
|
|
|
||
|
|
debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
|
||
|
|
return sn.r.n;
|
||
|
|
@@ -1497,7 +1497,7 @@ static void chain_expr(uint32_t info)
|
||
|
|
|
||
|
|
n = chain_node(info);
|
||
|
|
|
||
|
|
- n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
|
||
|
|
+ n->l.n = parse_expr(TS_OPTERM | TC_GRPTERM);
|
||
|
|
if ((info & OF_REQUIRED) && !n->l.n)
|
||
|
|
syntax_error(EMSG_TOO_FEW_ARGS);
|
||
|
|
|
||
|
|
@@ -1535,12 +1535,12 @@ static void chain_group(void)
|
||
|
|
node *n, *n2, *n3;
|
||
|
|
|
||
|
|
do {
|
||
|
|
- c = next_token(TC_GRPSEQ);
|
||
|
|
+ c = next_token(TS_GRPSEQ);
|
||
|
|
} while (c & TC_NEWLINE);
|
||
|
|
|
||
|
|
if (c & TC_GRPSTART) {
|
||
|
|
debug_printf_parse("%s: TC_GRPSTART\n", __func__);
|
||
|
|
- while (next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
|
||
|
|
+ while (next_token(TS_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
|
||
|
|
debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
|
||
|
|
if (t_tclass & TC_NEWLINE)
|
||
|
|
continue;
|
||
|
|
@@ -1548,13 +1548,13 @@ static void chain_group(void)
|
||
|
|
chain_group();
|
||
|
|
}
|
||
|
|
debug_printf_parse("%s: TC_GRPTERM\n", __func__);
|
||
|
|
- } else if (c & (TC_OPSEQ | TC_OPTERM)) {
|
||
|
|
- debug_printf_parse("%s: TC_OPSEQ | TC_OPTERM\n", __func__);
|
||
|
|
+ } else if (c & (TS_OPSEQ | TS_OPTERM)) {
|
||
|
|
+ debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
chain_expr(OC_EXEC | Vx);
|
||
|
|
} else {
|
||
|
|
- /* TC_STATEMNT */
|
||
|
|
- debug_printf_parse("%s: TC_STATEMNT(?)\n", __func__);
|
||
|
|
+ /* TS_STATEMNT */
|
||
|
|
+ debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
|
||
|
|
switch (t_info & OPCLSMASK) {
|
||
|
|
case ST_IF:
|
||
|
|
debug_printf_parse("%s: ST_IF\n", __func__);
|
||
|
|
@@ -1563,7 +1563,7 @@ static void chain_group(void)
|
||
|
|
chain_group();
|
||
|
|
n2 = chain_node(OC_EXEC);
|
||
|
|
n->r.n = seq->last;
|
||
|
|
- if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
|
||
|
|
+ if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
|
||
|
|
chain_group();
|
||
|
|
n2->a.n = seq->last;
|
||
|
|
} else {
|
||
|
|
@@ -1616,10 +1616,10 @@ static void chain_group(void)
|
||
|
|
case OC_PRINTF:
|
||
|
|
debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
|
||
|
|
n = chain_node(t_info);
|
||
|
|
- n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
|
||
|
|
+ n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM);
|
||
|
|
if (t_tclass & TC_OUTRDR) {
|
||
|
|
n->info |= t_info;
|
||
|
|
- n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
|
||
|
|
+ n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM);
|
||
|
|
}
|
||
|
|
if (t_tclass & TC_GRPTERM)
|
||
|
|
rollback_token();
|
||
|
|
@@ -1658,11 +1658,11 @@ static void parse_program(char *p)
|
||
|
|
|
||
|
|
g_pos = p;
|
||
|
|
t_lineno = 1;
|
||
|
|
- while ((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
|
||
|
|
- TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
|
||
|
|
+ while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_GRPSTART |
|
||
|
|
+ TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
|
||
|
|
|
||
|
|
- if (tclass & TC_OPTERM) {
|
||
|
|
- debug_printf_parse("%s: TC_OPTERM\n", __func__);
|
||
|
|
+ if (tclass & TS_OPTERM) {
|
||
|
|
+ debug_printf_parse("%s: TS_OPTERM\n", __func__);
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -1706,11 +1706,11 @@ static void parse_program(char *p)
|
||
|
|
seq = &f->body;
|
||
|
|
chain_group();
|
||
|
|
clear_array(ahash);
|
||
|
|
- } else if (tclass & TC_OPSEQ) {
|
||
|
|
- debug_printf_parse("%s: TC_OPSEQ\n", __func__);
|
||
|
|
+ } else if (tclass & TS_OPSEQ) {
|
||
|
|
+ debug_printf_parse("%s: TS_OPSEQ\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
cn = chain_node(OC_TEST);
|
||
|
|
- cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
|
||
|
|
+ cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_GRPSTART);
|
||
|
|
if (t_tclass & TC_GRPSTART) {
|
||
|
|
debug_printf_parse("%s: TC_GRPSTART\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 01cbacb45972e14aa3072bf539c391dd03ed3955 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 01:30:49 +0200
|
||
|
|
Subject: [PATCH 10/61] awk: deindent code block, no code changes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 177 +++++++++++++++++++++++++-------------------------
|
||
|
|
1 file changed, 90 insertions(+), 87 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 764a3dd49..9a3b63df6 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1337,8 +1337,9 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
cn->a.n = glptr;
|
||
|
|
expected_tc = TS_OPERAND | TS_UOPPRE;
|
||
|
|
glptr = NULL;
|
||
|
|
-
|
||
|
|
- } else if (tc & (TS_BINOP | TC_UOPPOST)) {
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ if (tc & (TS_BINOP | TC_UOPPOST)) {
|
||
|
|
debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
|
||
|
|
/* for binary and postfix-unary operators, jump back over
|
||
|
|
* previous operators with higher priority */
|
||
|
|
@@ -1368,101 +1369,103 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
|
||
|
|
}
|
||
|
|
vn->a.n = cn;
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
|
||
|
|
- } else {
|
||
|
|
- debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
|
||
|
|
- /* for operands and prefix-unary operators, attach them
|
||
|
|
- * to last node */
|
||
|
|
- vn = cn;
|
||
|
|
- cn = vn->r.n = new_node(t_info);
|
||
|
|
- cn->a.n = vn;
|
||
|
|
+ debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
|
||
|
|
+ /* for operands and prefix-unary operators, attach them
|
||
|
|
+ * to last node */
|
||
|
|
+ vn = cn;
|
||
|
|
+ cn = vn->r.n = new_node(t_info);
|
||
|
|
+ cn->a.n = vn;
|
||
|
|
|
||
|
|
- expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
|
||
|
|
- if (t_info == TI_PREINC || t_info == TI_PREDEC)
|
||
|
|
- expected_tc = TS_LVALUE | TC_UOPPRE1;
|
||
|
|
- if (tc & (TS_OPERAND | TC_REGEXP)) {
|
||
|
|
- debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
|
||
|
|
- expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
|
||
|
|
- /* one should be very careful with switch on tclass -
|
||
|
|
- * only simple tclasses should be used (TC_xyz, not TS_xyz) */
|
||
|
|
- switch (tc) {
|
||
|
|
- case TC_VARIABLE:
|
||
|
|
- case TC_ARRAY:
|
||
|
|
- debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
|
||
|
|
- cn->info = OC_VAR;
|
||
|
|
- v = hash_search(ahash, t_string);
|
||
|
|
- if (v != NULL) {
|
||
|
|
- cn->info = OC_FNARG;
|
||
|
|
- cn->l.aidx = v->x.aidx;
|
||
|
|
- } else {
|
||
|
|
- cn->l.v = newvar(t_string);
|
||
|
|
- }
|
||
|
|
- if (tc & TC_ARRAY) {
|
||
|
|
- cn->info |= xS;
|
||
|
|
- cn->r.n = parse_expr(TC_ARRTERM);
|
||
|
|
- }
|
||
|
|
- break;
|
||
|
|
+ expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
|
||
|
|
+ if (t_info == TI_PREINC || t_info == TI_PREDEC)
|
||
|
|
+ expected_tc = TS_LVALUE | TC_UOPPRE1;
|
||
|
|
|
||
|
|
- case TC_NUMBER:
|
||
|
|
- case TC_STRING:
|
||
|
|
- debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
|
||
|
|
- cn->info = OC_VAR;
|
||
|
|
- v = cn->l.v = xzalloc(sizeof(var));
|
||
|
|
- if (tc & TC_NUMBER)
|
||
|
|
- setvar_i(v, t_double);
|
||
|
|
- else {
|
||
|
|
- setvar_s(v, t_string);
|
||
|
|
- expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
|
||
|
|
- }
|
||
|
|
- break;
|
||
|
|
+ if (!(tc & (TS_OPERAND | TC_REGEXP)))
|
||
|
|
+ continue;
|
||
|
|
|
||
|
|
- case TC_REGEXP:
|
||
|
|
- debug_printf_parse("%s: TC_REGEXP\n", __func__);
|
||
|
|
- mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
|
||
|
|
- break;
|
||
|
|
+ debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
|
||
|
|
+ expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
|
||
|
|
+ /* one should be very careful with switch on tclass -
|
||
|
|
+ * only simple tclasses should be used (TC_xyz, not TS_xyz) */
|
||
|
|
+ switch (tc) {
|
||
|
|
+ case TC_VARIABLE:
|
||
|
|
+ case TC_ARRAY:
|
||
|
|
+ debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
|
||
|
|
+ cn->info = OC_VAR;
|
||
|
|
+ v = hash_search(ahash, t_string);
|
||
|
|
+ if (v != NULL) {
|
||
|
|
+ cn->info = OC_FNARG;
|
||
|
|
+ cn->l.aidx = v->x.aidx;
|
||
|
|
+ } else {
|
||
|
|
+ cn->l.v = newvar(t_string);
|
||
|
|
+ }
|
||
|
|
+ if (tc & TC_ARRAY) {
|
||
|
|
+ cn->info |= xS;
|
||
|
|
+ cn->r.n = parse_expr(TC_ARRTERM);
|
||
|
|
+ }
|
||
|
|
+ break;
|
||
|
|
|
||
|
|
- case TC_FUNCTION:
|
||
|
|
- debug_printf_parse("%s: TC_FUNCTION\n", __func__);
|
||
|
|
- cn->info = OC_FUNC;
|
||
|
|
- cn->r.f = newfunc(t_string);
|
||
|
|
- cn->l.n = condition();
|
||
|
|
- break;
|
||
|
|
+ case TC_NUMBER:
|
||
|
|
+ case TC_STRING:
|
||
|
|
+ debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
|
||
|
|
+ cn->info = OC_VAR;
|
||
|
|
+ v = cn->l.v = xzalloc(sizeof(var));
|
||
|
|
+ if (tc & TC_NUMBER)
|
||
|
|
+ setvar_i(v, t_double);
|
||
|
|
+ else {
|
||
|
|
+ setvar_s(v, t_string);
|
||
|
|
+ expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
|
||
|
|
+ }
|
||
|
|
+ break;
|
||
|
|
|
||
|
|
- case TC_SEQSTART:
|
||
|
|
- debug_printf_parse("%s: TC_SEQSTART\n", __func__);
|
||
|
|
- cn = vn->r.n = parse_expr(TC_SEQTERM);
|
||
|
|
- if (!cn)
|
||
|
|
- syntax_error("Empty sequence");
|
||
|
|
- cn->a.n = vn;
|
||
|
|
- break;
|
||
|
|
+ case TC_REGEXP:
|
||
|
|
+ debug_printf_parse("%s: TC_REGEXP\n", __func__);
|
||
|
|
+ mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
|
||
|
|
+ break;
|
||
|
|
|
||
|
|
- case TC_GETLINE:
|
||
|
|
- debug_printf_parse("%s: TC_GETLINE\n", __func__);
|
||
|
|
- glptr = cn;
|
||
|
|
- expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
|
||
|
|
- break;
|
||
|
|
+ case TC_FUNCTION:
|
||
|
|
+ debug_printf_parse("%s: TC_FUNCTION\n", __func__);
|
||
|
|
+ cn->info = OC_FUNC;
|
||
|
|
+ cn->r.f = newfunc(t_string);
|
||
|
|
+ cn->l.n = condition();
|
||
|
|
+ break;
|
||
|
|
|
||
|
|
- case TC_BUILTIN:
|
||
|
|
- debug_printf_parse("%s: TC_BUILTIN\n", __func__);
|
||
|
|
- cn->l.n = condition();
|
||
|
|
- break;
|
||
|
|
+ case TC_SEQSTART:
|
||
|
|
+ debug_printf_parse("%s: TC_SEQSTART\n", __func__);
|
||
|
|
+ cn = vn->r.n = parse_expr(TC_SEQTERM);
|
||
|
|
+ if (!cn)
|
||
|
|
+ syntax_error("Empty sequence");
|
||
|
|
+ cn->a.n = vn;
|
||
|
|
+ break;
|
||
|
|
|
||
|
|
- case TC_LENGTH:
|
||
|
|
- debug_printf_parse("%s: TC_LENGTH\n", __func__);
|
||
|
|
- next_token(TC_SEQSTART /* length(...) */
|
||
|
|
- | TS_OPTERM /* length; (or newline)*/
|
||
|
|
- | TC_GRPTERM /* length } */
|
||
|
|
- | TC_BINOPX /* length <op> NUM */
|
||
|
|
- | TC_COMMA /* print length, 1 */
|
||
|
|
- );
|
||
|
|
- rollback_token();
|
||
|
|
- if (t_tclass & TC_SEQSTART) {
|
||
|
|
- /* It was a "(" token. Handle just like TC_BUILTIN */
|
||
|
|
- cn->l.n = condition();
|
||
|
|
- }
|
||
|
|
- break;
|
||
|
|
- }
|
||
|
|
+ case TC_GETLINE:
|
||
|
|
+ debug_printf_parse("%s: TC_GETLINE\n", __func__);
|
||
|
|
+ glptr = cn;
|
||
|
|
+ expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
|
||
|
|
+ break;
|
||
|
|
+
|
||
|
|
+ case TC_BUILTIN:
|
||
|
|
+ debug_printf_parse("%s: TC_BUILTIN\n", __func__);
|
||
|
|
+ cn->l.n = condition();
|
||
|
|
+ break;
|
||
|
|
+
|
||
|
|
+ case TC_LENGTH:
|
||
|
|
+ debug_printf_parse("%s: TC_LENGTH\n", __func__);
|
||
|
|
+ next_token(TC_SEQSTART /* length(...) */
|
||
|
|
+ | TS_OPTERM /* length; (or newline)*/
|
||
|
|
+ | TC_GRPTERM /* length } */
|
||
|
|
+ | TC_BINOPX /* length <op> NUM */
|
||
|
|
+ | TC_COMMA /* print length, 1 */
|
||
|
|
+ );
|
||
|
|
+ rollback_token();
|
||
|
|
+ if (t_tclass & TC_SEQSTART) {
|
||
|
|
+ /* It was a "(" token. Handle just like TC_BUILTIN */
|
||
|
|
+ cn->l.n = condition();
|
||
|
|
}
|
||
|
|
+ break;
|
||
|
|
}
|
||
|
|
} /* while() */
|
||
|
|
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From acea2fffaa696b855d5189a8a1cd7591fac8891d Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 01:50:47 +0200
|
||
|
|
Subject: [PATCH 11/61] awk: rename TC_SEQSTART/END to L/RPAREN, no code
|
||
|
|
changes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 94 +++++++++++++++++++++++++--------------------------
|
||
|
|
1 file changed, 47 insertions(+), 47 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 9a3b63df6..d31b97d86 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -207,48 +207,48 @@ typedef struct tsplitter_s {
|
||
|
|
} tsplitter;
|
||
|
|
|
||
|
|
/* simple token classes */
|
||
|
|
-/* Order and hex values are very important!!! See next_token() */
|
||
|
|
-#define TC_SEQSTART (1 << 0) /* ( */
|
||
|
|
-#define TC_SEQTERM (1 << 1) /* ) */
|
||
|
|
+/* order and hex values are very important!!! See next_token() */
|
||
|
|
+#define TC_LPAREN (1 << 0) /* ( */
|
||
|
|
+#define TC_RPAREN (1 << 1) /* ) */
|
||
|
|
#define TC_REGEXP (1 << 2) /* /.../ */
|
||
|
|
#define TC_OUTRDR (1 << 3) /* | > >> */
|
||
|
|
#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */
|
||
|
|
#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */
|
||
|
|
#define TC_BINOPX (1 << 6) /* two-opnd operator */
|
||
|
|
-#define TC_IN (1 << 7)
|
||
|
|
-#define TC_COMMA (1 << 8)
|
||
|
|
-#define TC_PIPE (1 << 9) /* input redirection pipe */
|
||
|
|
+#define TC_IN (1 << 7) /* 'in' */
|
||
|
|
+#define TC_COMMA (1 << 8) /* , */
|
||
|
|
+#define TC_PIPE (1 << 9) /* input redirection pipe | */
|
||
|
|
#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
|
||
|
|
#define TC_ARRTERM (1 << 11) /* ] */
|
||
|
|
#define TC_GRPSTART (1 << 12) /* { */
|
||
|
|
#define TC_GRPTERM (1 << 13) /* } */
|
||
|
|
-#define TC_SEMICOL (1 << 14)
|
||
|
|
+#define TC_SEMICOL (1 << 14) /* ; */
|
||
|
|
#define TC_NEWLINE (1 << 15)
|
||
|
|
#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
|
||
|
|
-#define TC_WHILE (1 << 17)
|
||
|
|
-#define TC_ELSE (1 << 18)
|
||
|
|
+#define TC_WHILE (1 << 17) /* 'while' */
|
||
|
|
+#define TC_ELSE (1 << 18) /* 'else' */
|
||
|
|
#define TC_BUILTIN (1 << 19)
|
||
|
|
/* This costs ~50 bytes of code.
|
||
|
|
* A separate class to support deprecated "length" form. If we don't need that
|
||
|
|
* (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
|
||
|
|
* can be merged with TC_BUILTIN:
|
||
|
|
*/
|
||
|
|
-#define TC_LENGTH (1 << 20)
|
||
|
|
-#define TC_GETLINE (1 << 21)
|
||
|
|
+#define TC_LENGTH (1 << 20) /* 'length' */
|
||
|
|
+#define TC_GETLINE (1 << 21) /* 'getline' */
|
||
|
|
#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */
|
||
|
|
-#define TC_BEGIN (1 << 23)
|
||
|
|
-#define TC_END (1 << 24)
|
||
|
|
+#define TC_BEGIN (1 << 23) /* 'BEGIN' */
|
||
|
|
+#define TC_END (1 << 24) /* 'END' */
|
||
|
|
#define TC_EOF (1 << 25)
|
||
|
|
-#define TC_VARIABLE (1 << 26)
|
||
|
|
-#define TC_ARRAY (1 << 27)
|
||
|
|
-#define TC_FUNCTION (1 << 28)
|
||
|
|
-#define TC_STRING (1 << 29)
|
||
|
|
+#define TC_VARIABLE (1 << 26) /* name */
|
||
|
|
+#define TC_ARRAY (1 << 27) /* name[ */
|
||
|
|
+#define TC_FUNCTION (1 << 28) /* name( - but unlike TC_ARRAY, parser does not consume '(' */
|
||
|
|
+#define TC_STRING (1 << 29) /* "..." */
|
||
|
|
#define TC_NUMBER (1 << 30)
|
||
|
|
|
||
|
|
#ifndef debug_parse_print_tc
|
||
|
|
#define debug_parse_print_tc(n) do { \
|
||
|
|
-if ((n) & TC_SEQSTART) debug_printf_parse(" SEQSTART"); \
|
||
|
|
-if ((n) & TC_SEQTERM ) debug_printf_parse(" SEQTERM" ); \
|
||
|
|
+if ((n) & TC_LPAREN ) debug_printf_parse(" LPAREN" ); \
|
||
|
|
+if ((n) & TC_RPAREN ) debug_printf_parse(" RPAREN" ); \
|
||
|
|
if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \
|
||
|
|
if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \
|
||
|
|
if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \
|
||
|
|
@@ -288,7 +288,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
|
||
|
|
//#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST)
|
||
|
|
#define TS_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
|
||
|
|
| TC_BUILTIN | TC_LENGTH | TC_GETLINE \
|
||
|
|
- | TC_SEQSTART | TC_STRING | TC_NUMBER)
|
||
|
|
+ | TC_LPAREN | TC_STRING | TC_NUMBER)
|
||
|
|
|
||
|
|
#define TS_LVALUE (TC_VARIABLE | TC_ARRAY)
|
||
|
|
#define TS_STATEMNT (TC_STATX | TC_WHILE)
|
||
|
|
@@ -310,7 +310,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
|
||
|
|
|
||
|
|
/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
|
||
|
|
/* operator is inserted between them */
|
||
|
|
-#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM \
|
||
|
|
+#define TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \
|
||
|
|
| TC_STRING | TC_NUMBER | TC_UOPPOST \
|
||
|
|
| TC_LENGTH)
|
||
|
|
#define TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
|
||
|
|
@@ -394,8 +394,8 @@ enum {
|
||
|
|
#define NTCC '\377'
|
||
|
|
|
||
|
|
static const char tokenlist[] ALIGN1 =
|
||
|
|
- "\1(" NTC /* TC_SEQSTART */
|
||
|
|
- "\1)" NTC /* TC_SEQTERM */
|
||
|
|
+ "\1(" NTC /* TC_LPAREN */
|
||
|
|
+ "\1)" NTC /* TC_RPAREN */
|
||
|
|
"\1/" NTC /* TC_REGEXP */
|
||
|
|
"\2>>" "\1>" "\1|" NTC /* TC_OUTRDR */
|
||
|
|
"\2++" "\2--" NTC /* TC_UOPPOST */
|
||
|
|
@@ -1250,9 +1250,9 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
/* insert concatenation operator when needed */
|
||
|
|
debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
|
||
|
|
(last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
|
||
|
|
- !(last_token_class == TC_LENGTH && tc == TC_SEQSTART));
|
||
|
|
+ !(last_token_class == TC_LENGTH && tc == TC_LPAREN));
|
||
|
|
if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
|
||
|
|
- && !(last_token_class == TC_LENGTH && tc == TC_SEQSTART) /* but not for "length(..." */
|
||
|
|
+ && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */
|
||
|
|
) {
|
||
|
|
concat_inserted = TRUE;
|
||
|
|
save_tclass = tc;
|
||
|
|
@@ -1304,10 +1304,10 @@ static void mk_re_node(const char *s, node *n, regex_t *re)
|
||
|
|
xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
|
||
|
|
}
|
||
|
|
|
||
|
|
-static node *condition(void)
|
||
|
|
+static node *parse_lrparen_list(void)
|
||
|
|
{
|
||
|
|
- next_token(TC_SEQSTART);
|
||
|
|
- return parse_expr(TC_SEQTERM);
|
||
|
|
+ next_token(TC_LPAREN);
|
||
|
|
+ return parse_expr(TC_RPAREN);
|
||
|
|
}
|
||
|
|
|
||
|
|
/* parse expression terminated by given argument, return ptr
|
||
|
|
@@ -1430,12 +1430,12 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
debug_printf_parse("%s: TC_FUNCTION\n", __func__);
|
||
|
|
cn->info = OC_FUNC;
|
||
|
|
cn->r.f = newfunc(t_string);
|
||
|
|
- cn->l.n = condition();
|
||
|
|
+ cn->l.n = parse_lrparen_list();
|
||
|
|
break;
|
||
|
|
|
||
|
|
- case TC_SEQSTART:
|
||
|
|
- debug_printf_parse("%s: TC_SEQSTART\n", __func__);
|
||
|
|
- cn = vn->r.n = parse_expr(TC_SEQTERM);
|
||
|
|
+ case TC_LPAREN:
|
||
|
|
+ debug_printf_parse("%s: TC_LPAREN\n", __func__);
|
||
|
|
+ cn = vn->r.n = parse_expr(TC_RPAREN);
|
||
|
|
if (!cn)
|
||
|
|
syntax_error("Empty sequence");
|
||
|
|
cn->a.n = vn;
|
||
|
|
@@ -1449,21 +1449,21 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
|
||
|
|
case TC_BUILTIN:
|
||
|
|
debug_printf_parse("%s: TC_BUILTIN\n", __func__);
|
||
|
|
- cn->l.n = condition();
|
||
|
|
+ cn->l.n = parse_lrparen_list();
|
||
|
|
break;
|
||
|
|
|
||
|
|
case TC_LENGTH:
|
||
|
|
debug_printf_parse("%s: TC_LENGTH\n", __func__);
|
||
|
|
- next_token(TC_SEQSTART /* length(...) */
|
||
|
|
+ next_token(TC_LPAREN /* length(...) */
|
||
|
|
| TS_OPTERM /* length; (or newline)*/
|
||
|
|
| TC_GRPTERM /* length } */
|
||
|
|
| TC_BINOPX /* length <op> NUM */
|
||
|
|
| TC_COMMA /* print length, 1 */
|
||
|
|
);
|
||
|
|
rollback_token();
|
||
|
|
- if (t_tclass & TC_SEQSTART) {
|
||
|
|
+ if (t_tclass & TC_LPAREN) {
|
||
|
|
/* It was a "(" token. Handle just like TC_BUILTIN */
|
||
|
|
- cn->l.n = condition();
|
||
|
|
+ cn->l.n = parse_lrparen_list();
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
@@ -1562,7 +1562,7 @@ static void chain_group(void)
|
||
|
|
case ST_IF:
|
||
|
|
debug_printf_parse("%s: ST_IF\n", __func__);
|
||
|
|
n = chain_node(OC_BR | Vx);
|
||
|
|
- n->l.n = condition();
|
||
|
|
+ n->l.n = parse_lrparen_list();
|
||
|
|
chain_group();
|
||
|
|
n2 = chain_node(OC_EXEC);
|
||
|
|
n->r.n = seq->last;
|
||
|
|
@@ -1576,7 +1576,7 @@ static void chain_group(void)
|
||
|
|
|
||
|
|
case ST_WHILE:
|
||
|
|
debug_printf_parse("%s: ST_WHILE\n", __func__);
|
||
|
|
- n2 = condition();
|
||
|
|
+ n2 = parse_lrparen_list();
|
||
|
|
n = chain_loop(NULL);
|
||
|
|
n->l.n = n2;
|
||
|
|
break;
|
||
|
|
@@ -1587,14 +1587,14 @@ static void chain_group(void)
|
||
|
|
n = chain_loop(NULL);
|
||
|
|
n2->a.n = n->a.n;
|
||
|
|
next_token(TC_WHILE);
|
||
|
|
- n->l.n = condition();
|
||
|
|
+ n->l.n = parse_lrparen_list();
|
||
|
|
break;
|
||
|
|
|
||
|
|
case ST_FOR:
|
||
|
|
debug_printf_parse("%s: ST_FOR\n", __func__);
|
||
|
|
- next_token(TC_SEQSTART);
|
||
|
|
- n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
|
||
|
|
- if (t_tclass & TC_SEQTERM) { /* for-in */
|
||
|
|
+ next_token(TC_LPAREN);
|
||
|
|
+ n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
|
||
|
|
+ if (t_tclass & TC_RPAREN) { /* for-in */
|
||
|
|
if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
|
||
|
|
syntax_error(EMSG_UNEXP_TOKEN);
|
||
|
|
n = chain_node(OC_WALKINIT | VV);
|
||
|
|
@@ -1607,7 +1607,7 @@ static void chain_group(void)
|
||
|
|
n = chain_node(OC_EXEC | Vx);
|
||
|
|
n->l.n = n2;
|
||
|
|
n2 = parse_expr(TC_SEMICOL);
|
||
|
|
- n3 = parse_expr(TC_SEQTERM);
|
||
|
|
+ n3 = parse_expr(TC_RPAREN);
|
||
|
|
n = chain_loop(n3);
|
||
|
|
n->l.n = n2;
|
||
|
|
if (!n2)
|
||
|
|
@@ -1686,13 +1686,13 @@ static void parse_program(char *p)
|
||
|
|
f->body.first = NULL;
|
||
|
|
f->nargs = 0;
|
||
|
|
/* Match func arg list: a comma sep list of >= 0 args, and a close paren */
|
||
|
|
- while (next_token(TC_VARIABLE | TC_SEQTERM | TC_COMMA)) {
|
||
|
|
+ while (next_token(TC_VARIABLE | TC_RPAREN | TC_COMMA)) {
|
||
|
|
/* Either an empty arg list, or trailing comma from prev iter
|
||
|
|
* must be followed by an arg */
|
||
|
|
- if (f->nargs == 0 && t_tclass == TC_SEQTERM)
|
||
|
|
+ if (f->nargs == 0 && t_tclass == TC_RPAREN)
|
||
|
|
break;
|
||
|
|
|
||
|
|
- /* TC_SEQSTART/TC_COMMA must be followed by TC_VARIABLE */
|
||
|
|
+ /* TC_LPAREN/TC_COMMA must be followed by TC_VARIABLE */
|
||
|
|
if (t_tclass != TC_VARIABLE)
|
||
|
|
syntax_error(EMSG_UNEXP_TOKEN);
|
||
|
|
|
||
|
|
@@ -1700,7 +1700,7 @@ static void parse_program(char *p)
|
||
|
|
v->x.aidx = f->nargs++;
|
||
|
|
|
||
|
|
/* Arg followed either by end of arg list or 1 comma */
|
||
|
|
- if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
|
||
|
|
+ if (next_token(TC_COMMA | TC_RPAREN) & TC_RPAREN)
|
||
|
|
break;
|
||
|
|
//Impossible: next_token() above would error out and die
|
||
|
|
// if (t_tclass != TC_COMMA)
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 100c649a6d5b8085be19fdcbf02218cf2bcb3cae Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 02:32:32 +0200
|
||
|
|
Subject: [PATCH 12/61] awk: simplify parsing of function declaration
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
parse_program 328 313 -15
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 26 ++++++++++----------------
|
||
|
|
1 file changed, 10 insertions(+), 16 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index d31b97d86..08ff02adb 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -769,7 +769,7 @@ static void hash_remove(xhash *hash, const char *name)
|
||
|
|
|
||
|
|
static char *skip_spaces(char *p)
|
||
|
|
{
|
||
|
|
- while (1) {
|
||
|
|
+ for (;;) {
|
||
|
|
if (*p == '\\' && p[1] == '\n') {
|
||
|
|
p++;
|
||
|
|
t_lineno++;
|
||
|
|
@@ -1685,26 +1685,20 @@ static void parse_program(char *p)
|
||
|
|
f = newfunc(t_string);
|
||
|
|
f->body.first = NULL;
|
||
|
|
f->nargs = 0;
|
||
|
|
- /* Match func arg list: a comma sep list of >= 0 args, and a close paren */
|
||
|
|
- while (next_token(TC_VARIABLE | TC_RPAREN | TC_COMMA)) {
|
||
|
|
- /* Either an empty arg list, or trailing comma from prev iter
|
||
|
|
- * must be followed by an arg */
|
||
|
|
- if (f->nargs == 0 && t_tclass == TC_RPAREN)
|
||
|
|
- break;
|
||
|
|
-
|
||
|
|
- /* TC_LPAREN/TC_COMMA must be followed by TC_VARIABLE */
|
||
|
|
- if (t_tclass != TC_VARIABLE)
|
||
|
|
+ /* func arg list: comma sep list of args, and a close paren */
|
||
|
|
+ for (;;) {
|
||
|
|
+ if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
|
||
|
|
+ if (f->nargs == 0)
|
||
|
|
+ break; /* func() is ok */
|
||
|
|
+ /* func(a,) is not ok */
|
||
|
|
syntax_error(EMSG_UNEXP_TOKEN);
|
||
|
|
-
|
||
|
|
+ }
|
||
|
|
v = findvar(ahash, t_string);
|
||
|
|
v->x.aidx = f->nargs++;
|
||
|
|
-
|
||
|
|
/* Arg followed either by end of arg list or 1 comma */
|
||
|
|
- if (next_token(TC_COMMA | TC_RPAREN) & TC_RPAREN)
|
||
|
|
+ if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN)
|
||
|
|
break;
|
||
|
|
-//Impossible: next_token() above would error out and die
|
||
|
|
-// if (t_tclass != TC_COMMA)
|
||
|
|
-// syntax_error(EMSG_UNEXP_TOKEN);
|
||
|
|
+ /* it was a comma, we ate it */
|
||
|
|
}
|
||
|
|
seq = &f->body;
|
||
|
|
chain_group();
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 38cbb39458b554d5bcfb5d326dd235f81e3c9b9d Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 02:43:02 +0200
|
||
|
|
Subject: [PATCH 13/61] awk: g_buf[] does not need a separate allocation
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
exec_builtin 1400 1414 +14
|
||
|
|
evaluate 3132 3141 +9
|
||
|
|
getvar_s 121 125 +4
|
||
|
|
awk_main 902 886 -16
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 3/1 up/down: 27/-16) Total: 11 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 8 +++-----
|
||
|
|
1 file changed, 3 insertions(+), 5 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 08ff02adb..7e4f0d142 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -535,7 +535,6 @@ struct globals {
|
||
|
|
var *Fields;
|
||
|
|
nvblock *g_cb;
|
||
|
|
char *g_pos;
|
||
|
|
- char *g_buf;
|
||
|
|
smallint icase;
|
||
|
|
smallint exiting;
|
||
|
|
smallint nextrec;
|
||
|
|
@@ -571,6 +570,8 @@ struct globals2 {
|
||
|
|
|
||
|
|
/* biggest and least used members go last */
|
||
|
|
tsplitter fsplitter, rsplitter;
|
||
|
|
+
|
||
|
|
+ char g_buf[MAXVARFMT + 1];
|
||
|
|
};
|
||
|
|
#define G1 (ptr_to_globals[-1])
|
||
|
|
#define G (*(struct globals2 *)ptr_to_globals)
|
||
|
|
@@ -598,7 +599,6 @@ struct globals2 {
|
||
|
|
#define Fields (G1.Fields )
|
||
|
|
#define g_cb (G1.g_cb )
|
||
|
|
#define g_pos (G1.g_pos )
|
||
|
|
-#define g_buf (G1.g_buf )
|
||
|
|
#define icase (G1.icase )
|
||
|
|
#define exiting (G1.exiting )
|
||
|
|
#define nextrec (G1.nextrec )
|
||
|
|
@@ -612,6 +612,7 @@ struct globals2 {
|
||
|
|
#define intvar (G.intvar )
|
||
|
|
#define fsplitter (G.fsplitter )
|
||
|
|
#define rsplitter (G.rsplitter )
|
||
|
|
+#define g_buf (G.g_buf )
|
||
|
|
#define INIT_G() do { \
|
||
|
|
SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
|
||
|
|
t_tclass = TS_OPTERM; \
|
||
|
|
@@ -3353,9 +3354,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
if (ENABLE_LOCALE_SUPPORT)
|
||
|
|
setlocale(LC_NUMERIC, "C");
|
||
|
|
|
||
|
|
- /* allocate global buffer */
|
||
|
|
- g_buf = xmalloc(MAXVARFMT + 1);
|
||
|
|
-
|
||
|
|
vhash = hash_init();
|
||
|
|
ahash = hash_init();
|
||
|
|
fdhash = hash_init();
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 743b012550834fe032bdc71257e646e202eac2b2 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 03:02:21 +0200
|
||
|
|
Subject: [PATCH 14/61] awk: when parsing TC_FUNCTION token, eat its opening
|
||
|
|
'('
|
||
|
|
|
||
|
|
...like we do for array references.
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
parse_expr 938 948 +10
|
||
|
|
next_token 788 791 +3
|
||
|
|
parse_program 313 310 -3
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 2/1 up/down: 13/-3) Total: 10 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 30 +++++++++++++++---------------
|
||
|
|
1 file changed, 15 insertions(+), 15 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 7e4f0d142..1a4468a53 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -241,7 +241,7 @@ typedef struct tsplitter_s {
|
||
|
|
#define TC_EOF (1 << 25)
|
||
|
|
#define TC_VARIABLE (1 << 26) /* name */
|
||
|
|
#define TC_ARRAY (1 << 27) /* name[ */
|
||
|
|
-#define TC_FUNCTION (1 << 28) /* name( - but unlike TC_ARRAY, parser does not consume '(' */
|
||
|
|
+#define TC_FUNCTION (1 << 28) /* name( */
|
||
|
|
#define TC_STRING (1 << 29) /* "..." */
|
||
|
|
#define TC_NUMBER (1 << 30)
|
||
|
|
|
||
|
|
@@ -959,6 +959,7 @@ static double getvar_i(var *v)
|
||
|
|
v->number = my_strtod(&s);
|
||
|
|
debug_printf_eval("%f (s:'%s')\n", v->number, s);
|
||
|
|
if (v->type & VF_USER) {
|
||
|
|
+//TODO: skip_spaces() also skips backslash+newline, is it intended here?
|
||
|
|
s = skip_spaces(s);
|
||
|
|
if (*s != '\0')
|
||
|
|
v->type &= ~VF_USER;
|
||
|
|
@@ -1103,7 +1104,7 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
#define save_tclass (G.next_token__save_tclass)
|
||
|
|
#define save_info (G.next_token__save_info)
|
||
|
|
|
||
|
|
- char *p, *s;
|
||
|
|
+ char *p;
|
||
|
|
const char *tl;
|
||
|
|
const uint32_t *ti;
|
||
|
|
uint32_t tc, last_token_class;
|
||
|
|
@@ -1131,15 +1132,12 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
while (*p != '\n' && *p != '\0')
|
||
|
|
p++;
|
||
|
|
|
||
|
|
- if (*p == '\n')
|
||
|
|
- t_lineno++;
|
||
|
|
-
|
||
|
|
if (*p == '\0') {
|
||
|
|
tc = TC_EOF;
|
||
|
|
debug_printf_parse("%s: token found: TC_EOF\n", __func__);
|
||
|
|
} else if (*p == '\"') {
|
||
|
|
/* it's a string */
|
||
|
|
- t_string = s = ++p;
|
||
|
|
+ char *s = t_string = ++p;
|
||
|
|
while (*p != '\"') {
|
||
|
|
char *pp;
|
||
|
|
if (*p == '\0' || *p == '\n')
|
||
|
|
@@ -1154,7 +1152,7 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
|
||
|
|
} else if ((expected & TC_REGEXP) && *p == '/') {
|
||
|
|
/* it's regexp */
|
||
|
|
- t_string = s = ++p;
|
||
|
|
+ char *s = t_string = ++p;
|
||
|
|
while (*p != '/') {
|
||
|
|
if (*p == '\0' || *p == '\n')
|
||
|
|
syntax_error(EMSG_UNEXP_EOS);
|
||
|
|
@@ -1185,6 +1183,9 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
tc = TC_NUMBER;
|
||
|
|
debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
|
||
|
|
} else {
|
||
|
|
+ if (*p == '\n')
|
||
|
|
+ t_lineno++;
|
||
|
|
+
|
||
|
|
/* search for something known */
|
||
|
|
tl = tokenlist;
|
||
|
|
tc = 0x00000001;
|
||
|
|
@@ -1230,15 +1231,15 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
|
||
|
|
p = skip_spaces(p);
|
||
|
|
if (*p == '(') {
|
||
|
|
+ p++;
|
||
|
|
tc = TC_FUNCTION;
|
||
|
|
debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
|
||
|
|
+ } else if (*p == '[') {
|
||
|
|
+ p++;
|
||
|
|
+ tc = TC_ARRAY;
|
||
|
|
+ debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
|
||
|
|
} else {
|
||
|
|
- if (*p == '[') {
|
||
|
|
- p++;
|
||
|
|
- tc = TC_ARRAY;
|
||
|
|
- debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
|
||
|
|
- } else
|
||
|
|
- debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
|
||
|
|
+ debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
token_found:
|
||
|
|
@@ -1431,7 +1432,7 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
debug_printf_parse("%s: TC_FUNCTION\n", __func__);
|
||
|
|
cn->info = OC_FUNC;
|
||
|
|
cn->r.f = newfunc(t_string);
|
||
|
|
- cn->l.n = parse_lrparen_list();
|
||
|
|
+ cn->l.n = parse_expr(TC_RPAREN);
|
||
|
|
break;
|
||
|
|
|
||
|
|
case TC_LPAREN:
|
||
|
|
@@ -1682,7 +1683,6 @@ static void parse_program(char *p)
|
||
|
|
} else if (tclass & TC_FUNCDECL) {
|
||
|
|
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
|
||
|
|
next_token(TC_FUNCTION);
|
||
|
|
- g_pos++;
|
||
|
|
f = newfunc(t_string);
|
||
|
|
f->body.first = NULL;
|
||
|
|
f->nargs = 0;
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From f80dfb802b4a0984293d50f80cd41519b109b524 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 03:27:07 +0200
|
||
|
|
Subject: [PATCH 15/61] awk: get rid of "move name one char back" trick in
|
||
|
|
next_token()
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
next_token 791 812 +21
|
||
|
|
awk_main 886 831 -55
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 1/1 up/down: 21/-55) Total: -34 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 54 +++++++++++++++++++++++++--------------------------
|
||
|
|
1 file changed, 27 insertions(+), 27 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 1a4468a53..fb1e5d59b 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -535,6 +535,7 @@ struct globals {
|
||
|
|
var *Fields;
|
||
|
|
nvblock *g_cb;
|
||
|
|
char *g_pos;
|
||
|
|
+ char g_saved_ch;
|
||
|
|
smallint icase;
|
||
|
|
smallint exiting;
|
||
|
|
smallint nextrec;
|
||
|
|
@@ -599,6 +600,7 @@ struct globals2 {
|
||
|
|
#define Fields (G1.Fields )
|
||
|
|
#define g_cb (G1.g_cb )
|
||
|
|
#define g_pos (G1.g_pos )
|
||
|
|
+#define g_saved_ch (G1.g_saved_ch )
|
||
|
|
#define icase (G1.icase )
|
||
|
|
#define exiting (G1.exiting )
|
||
|
|
#define nextrec (G1.nextrec )
|
||
|
|
@@ -1125,6 +1127,10 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
t_info = save_info;
|
||
|
|
} else {
|
||
|
|
p = g_pos;
|
||
|
|
+ if (g_saved_ch != '\0') {
|
||
|
|
+ *p = g_saved_ch;
|
||
|
|
+ g_saved_ch = '\0';
|
||
|
|
+ }
|
||
|
|
readnext:
|
||
|
|
p = skip_spaces(p);
|
||
|
|
g_lineno = t_lineno;
|
||
|
|
@@ -1183,6 +1189,8 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
tc = TC_NUMBER;
|
||
|
|
debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
|
||
|
|
} else {
|
||
|
|
+ char *end_of_name;
|
||
|
|
+
|
||
|
|
if (*p == '\n')
|
||
|
|
t_lineno++;
|
||
|
|
|
||
|
|
@@ -1219,16 +1227,14 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
if (!isalnum_(*p))
|
||
|
|
syntax_error(EMSG_UNEXP_TOKEN); /* no */
|
||
|
|
/* yes */
|
||
|
|
-/* "move name one char back" trick: we need a byte for NUL terminator */
|
||
|
|
-/* NB: this results in argv[i][-1] being used (!!!) in e.g. "awk -e 'NAME'" case */
|
||
|
|
- t_string = --p;
|
||
|
|
- while (isalnum_(*++p)) {
|
||
|
|
- p[-1] = *p;
|
||
|
|
- }
|
||
|
|
- p[-1] = '\0';
|
||
|
|
+ t_string = p;
|
||
|
|
+ while (isalnum_(*p))
|
||
|
|
+ p++;
|
||
|
|
+ end_of_name = p;
|
||
|
|
tc = TC_VARIABLE;
|
||
|
|
/* also consume whitespace between functionname and bracket */
|
||
|
|
if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
|
||
|
|
+//TODO: why if variable can be here (but not array ref), skipping is not allowed? Example where it matters?
|
||
|
|
p = skip_spaces(p);
|
||
|
|
if (*p == '(') {
|
||
|
|
p++;
|
||
|
|
@@ -1240,7 +1246,19 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
|
||
|
|
} else {
|
||
|
|
debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
|
||
|
|
+ if (end_of_name == p) {
|
||
|
|
+ /* there is no space for trailing NUL in t_string!
|
||
|
|
+ * We need to save the char we are going to NUL.
|
||
|
|
+ * (we'll use it in future call to next_token())
|
||
|
|
+ */
|
||
|
|
+ g_saved_ch = *end_of_name;
|
||
|
|
+// especially pathological example is V="abc"; V.2 - it's V concatenated to .2
|
||
|
|
+// (it evaluates to "abc0.2"). Because of this case, we can't simply cache
|
||
|
|
+// '.' and analyze it later: we also have to *store it back* in next
|
||
|
|
+// next_token(), in order to give my_strtod() the undamaged ".2" string.
|
||
|
|
+ }
|
||
|
|
}
|
||
|
|
+ *end_of_name = '\0'; /* terminate t_string */
|
||
|
|
}
|
||
|
|
token_found:
|
||
|
|
g_pos = p;
|
||
|
|
@@ -3420,38 +3438,20 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
|
||
|
|
g_progname = llist_pop(&list_f);
|
||
|
|
fd = xopen_stdin(g_progname);
|
||
|
|
- /* 1st byte is reserved for "move name one char back" trick in next_token */
|
||
|
|
- i = 1;
|
||
|
|
- s = NULL;
|
||
|
|
- for (;;) {
|
||
|
|
- int sz;
|
||
|
|
- s = xrealloc(s, i + 1000);
|
||
|
|
- sz = safe_read(fd, s + i, 1000);
|
||
|
|
- if (sz <= 0)
|
||
|
|
- break;
|
||
|
|
- i += sz;
|
||
|
|
- }
|
||
|
|
- s = xrealloc(s, i + 1); /* trim unused 999 bytes */
|
||
|
|
- s[i] = '\0';
|
||
|
|
+ s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
|
||
|
|
close(fd);
|
||
|
|
- parse_program(s + 1);
|
||
|
|
+ parse_program(s);
|
||
|
|
free(s);
|
||
|
|
}
|
||
|
|
g_progname = "cmd. line";
|
||
|
|
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
while (list_e) {
|
||
|
|
- /* NB: "move name one char back" trick in next_token
|
||
|
|
- * can use argv[i][-1] here.
|
||
|
|
- */
|
||
|
|
parse_program(llist_pop(&list_e));
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
if (!(opt & (OPT_f | OPT_e))) {
|
||
|
|
if (!*argv)
|
||
|
|
bb_show_usage();
|
||
|
|
- /* NB: "move name one char back" trick in next_token
|
||
|
|
- * can use argv[i][-1] here.
|
||
|
|
- */
|
||
|
|
parse_program(*argv++);
|
||
|
|
}
|
||
|
|
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 7fbe3864b057dd6c1ba39d7b5071502c09280767 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 03:44:56 +0200
|
||
|
|
Subject: [PATCH 16/61] awk: code shrink
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
parse_expr 948 945 -3
|
||
|
|
chain_expr 65 62 -3
|
||
|
|
chain_group 655 649 -6
|
||
|
|
parse_program 310 303 -7
|
||
|
|
rollback_token 10 - -10
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/1 grow/shrink: 0/4 up/down: 0/-29) Total: -29 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 18 +++++++++++-------
|
||
|
|
1 file changed, 11 insertions(+), 7 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index fb1e5d59b..3d1c04a32 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1300,7 +1300,7 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
#undef save_info
|
||
|
|
}
|
||
|
|
|
||
|
|
-static void rollback_token(void)
|
||
|
|
+static ALWAYS_INLINE void rollback_token(void)
|
||
|
|
{
|
||
|
|
t_rollback = TRUE;
|
||
|
|
}
|
||
|
|
@@ -1474,14 +1474,14 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
|
||
|
|
case TC_LENGTH:
|
||
|
|
debug_printf_parse("%s: TC_LENGTH\n", __func__);
|
||
|
|
- next_token(TC_LPAREN /* length(...) */
|
||
|
|
+ tc = next_token(TC_LPAREN /* length(...) */
|
||
|
|
| TS_OPTERM /* length; (or newline)*/
|
||
|
|
| TC_GRPTERM /* length } */
|
||
|
|
| TC_BINOPX /* length <op> NUM */
|
||
|
|
| TC_COMMA /* print length, 1 */
|
||
|
|
);
|
||
|
|
rollback_token();
|
||
|
|
- if (t_tclass & TC_LPAREN) {
|
||
|
|
+ if (tc & TC_LPAREN) {
|
||
|
|
/* It was a "(" token. Handle just like TC_BUILTIN */
|
||
|
|
cn->l.n = parse_lrparen_list();
|
||
|
|
}
|
||
|
|
@@ -1563,19 +1563,23 @@ static void chain_group(void)
|
||
|
|
|
||
|
|
if (c & TC_GRPSTART) {
|
||
|
|
debug_printf_parse("%s: TC_GRPSTART\n", __func__);
|
||
|
|
- while (next_token(TS_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
|
||
|
|
+ while ((c = next_token(TS_GRPSEQ | TC_GRPTERM)) != TC_GRPTERM) {
|
||
|
|
debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
|
||
|
|
- if (t_tclass & TC_NEWLINE)
|
||
|
|
+ if (c & TC_NEWLINE)
|
||
|
|
continue;
|
||
|
|
rollback_token();
|
||
|
|
chain_group();
|
||
|
|
}
|
||
|
|
debug_printf_parse("%s: TC_GRPTERM\n", __func__);
|
||
|
|
- } else if (c & (TS_OPSEQ | TS_OPTERM)) {
|
||
|
|
+ return;
|
||
|
|
+ }
|
||
|
|
+ if (c & (TS_OPSEQ | TS_OPTERM)) {
|
||
|
|
debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
chain_expr(OC_EXEC | Vx);
|
||
|
|
- } else {
|
||
|
|
+ return;
|
||
|
|
+ }
|
||
|
|
+ {
|
||
|
|
/* TS_STATEMNT */
|
||
|
|
debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
|
||
|
|
switch (t_info & OPCLSMASK) {
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 9dba9fae14ec415943d1fda31b6b48d56d5cb0d0 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 03:47:46 +0200
|
||
|
|
Subject: [PATCH 17/61] awk: deindent a block, no code changes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 167 +++++++++++++++++++++++++-------------------------
|
||
|
|
1 file changed, 83 insertions(+), 84 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 3d1c04a32..34bcc1798 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1579,98 +1579,97 @@ static void chain_group(void)
|
||
|
|
chain_expr(OC_EXEC | Vx);
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
- {
|
||
|
|
- /* TS_STATEMNT */
|
||
|
|
- debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
|
||
|
|
- switch (t_info & OPCLSMASK) {
|
||
|
|
- case ST_IF:
|
||
|
|
- debug_printf_parse("%s: ST_IF\n", __func__);
|
||
|
|
- n = chain_node(OC_BR | Vx);
|
||
|
|
- n->l.n = parse_lrparen_list();
|
||
|
|
+
|
||
|
|
+ /* TS_STATEMNT */
|
||
|
|
+ debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
|
||
|
|
+ switch (t_info & OPCLSMASK) {
|
||
|
|
+ case ST_IF:
|
||
|
|
+ debug_printf_parse("%s: ST_IF\n", __func__);
|
||
|
|
+ n = chain_node(OC_BR | Vx);
|
||
|
|
+ n->l.n = parse_lrparen_list();
|
||
|
|
+ chain_group();
|
||
|
|
+ n2 = chain_node(OC_EXEC);
|
||
|
|
+ n->r.n = seq->last;
|
||
|
|
+ if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
|
||
|
|
chain_group();
|
||
|
|
- n2 = chain_node(OC_EXEC);
|
||
|
|
- n->r.n = seq->last;
|
||
|
|
- if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
|
||
|
|
- chain_group();
|
||
|
|
- n2->a.n = seq->last;
|
||
|
|
- } else {
|
||
|
|
- rollback_token();
|
||
|
|
- }
|
||
|
|
- break;
|
||
|
|
+ n2->a.n = seq->last;
|
||
|
|
+ } else {
|
||
|
|
+ rollback_token();
|
||
|
|
+ }
|
||
|
|
+ break;
|
||
|
|
|
||
|
|
- case ST_WHILE:
|
||
|
|
- debug_printf_parse("%s: ST_WHILE\n", __func__);
|
||
|
|
- n2 = parse_lrparen_list();
|
||
|
|
- n = chain_loop(NULL);
|
||
|
|
- n->l.n = n2;
|
||
|
|
- break;
|
||
|
|
+ case ST_WHILE:
|
||
|
|
+ debug_printf_parse("%s: ST_WHILE\n", __func__);
|
||
|
|
+ n2 = parse_lrparen_list();
|
||
|
|
+ n = chain_loop(NULL);
|
||
|
|
+ n->l.n = n2;
|
||
|
|
+ break;
|
||
|
|
|
||
|
|
- case ST_DO:
|
||
|
|
- debug_printf_parse("%s: ST_DO\n", __func__);
|
||
|
|
- n2 = chain_node(OC_EXEC);
|
||
|
|
- n = chain_loop(NULL);
|
||
|
|
- n2->a.n = n->a.n;
|
||
|
|
- next_token(TC_WHILE);
|
||
|
|
- n->l.n = parse_lrparen_list();
|
||
|
|
- break;
|
||
|
|
+ case ST_DO:
|
||
|
|
+ debug_printf_parse("%s: ST_DO\n", __func__);
|
||
|
|
+ n2 = chain_node(OC_EXEC);
|
||
|
|
+ n = chain_loop(NULL);
|
||
|
|
+ n2->a.n = n->a.n;
|
||
|
|
+ next_token(TC_WHILE);
|
||
|
|
+ n->l.n = parse_lrparen_list();
|
||
|
|
+ break;
|
||
|
|
|
||
|
|
- case ST_FOR:
|
||
|
|
- debug_printf_parse("%s: ST_FOR\n", __func__);
|
||
|
|
- next_token(TC_LPAREN);
|
||
|
|
- n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
|
||
|
|
- if (t_tclass & TC_RPAREN) { /* for-in */
|
||
|
|
- if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
|
||
|
|
- syntax_error(EMSG_UNEXP_TOKEN);
|
||
|
|
- n = chain_node(OC_WALKINIT | VV);
|
||
|
|
- n->l.n = n2->l.n;
|
||
|
|
- n->r.n = n2->r.n;
|
||
|
|
- n = chain_loop(NULL);
|
||
|
|
- n->info = OC_WALKNEXT | Vx;
|
||
|
|
- n->l.n = n2->l.n;
|
||
|
|
- } else { /* for (;;) */
|
||
|
|
- n = chain_node(OC_EXEC | Vx);
|
||
|
|
- n->l.n = n2;
|
||
|
|
- n2 = parse_expr(TC_SEMICOL);
|
||
|
|
- n3 = parse_expr(TC_RPAREN);
|
||
|
|
- n = chain_loop(n3);
|
||
|
|
- n->l.n = n2;
|
||
|
|
- if (!n2)
|
||
|
|
- n->info = OC_EXEC;
|
||
|
|
- }
|
||
|
|
- break;
|
||
|
|
+ case ST_FOR:
|
||
|
|
+ debug_printf_parse("%s: ST_FOR\n", __func__);
|
||
|
|
+ next_token(TC_LPAREN);
|
||
|
|
+ n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
|
||
|
|
+ if (t_tclass & TC_RPAREN) { /* for-in */
|
||
|
|
+ if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
|
||
|
|
+ syntax_error(EMSG_UNEXP_TOKEN);
|
||
|
|
+ n = chain_node(OC_WALKINIT | VV);
|
||
|
|
+ n->l.n = n2->l.n;
|
||
|
|
+ n->r.n = n2->r.n;
|
||
|
|
+ n = chain_loop(NULL);
|
||
|
|
+ n->info = OC_WALKNEXT | Vx;
|
||
|
|
+ n->l.n = n2->l.n;
|
||
|
|
+ } else { /* for (;;) */
|
||
|
|
+ n = chain_node(OC_EXEC | Vx);
|
||
|
|
+ n->l.n = n2;
|
||
|
|
+ n2 = parse_expr(TC_SEMICOL);
|
||
|
|
+ n3 = parse_expr(TC_RPAREN);
|
||
|
|
+ n = chain_loop(n3);
|
||
|
|
+ n->l.n = n2;
|
||
|
|
+ if (!n2)
|
||
|
|
+ n->info = OC_EXEC;
|
||
|
|
+ }
|
||
|
|
+ break;
|
||
|
|
|
||
|
|
- case OC_PRINT:
|
||
|
|
- case OC_PRINTF:
|
||
|
|
- debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
|
||
|
|
- n = chain_node(t_info);
|
||
|
|
- n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM);
|
||
|
|
- if (t_tclass & TC_OUTRDR) {
|
||
|
|
- n->info |= t_info;
|
||
|
|
- n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM);
|
||
|
|
- }
|
||
|
|
- if (t_tclass & TC_GRPTERM)
|
||
|
|
- rollback_token();
|
||
|
|
- break;
|
||
|
|
+ case OC_PRINT:
|
||
|
|
+ case OC_PRINTF:
|
||
|
|
+ debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
|
||
|
|
+ n = chain_node(t_info);
|
||
|
|
+ n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM);
|
||
|
|
+ if (t_tclass & TC_OUTRDR) {
|
||
|
|
+ n->info |= t_info;
|
||
|
|
+ n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM);
|
||
|
|
+ }
|
||
|
|
+ if (t_tclass & TC_GRPTERM)
|
||
|
|
+ rollback_token();
|
||
|
|
+ break;
|
||
|
|
|
||
|
|
- case OC_BREAK:
|
||
|
|
- debug_printf_parse("%s: OC_BREAK\n", __func__);
|
||
|
|
- n = chain_node(OC_EXEC);
|
||
|
|
- n->a.n = break_ptr;
|
||
|
|
- chain_expr(t_info);
|
||
|
|
- break;
|
||
|
|
+ case OC_BREAK:
|
||
|
|
+ debug_printf_parse("%s: OC_BREAK\n", __func__);
|
||
|
|
+ n = chain_node(OC_EXEC);
|
||
|
|
+ n->a.n = break_ptr;
|
||
|
|
+ chain_expr(t_info);
|
||
|
|
+ break;
|
||
|
|
|
||
|
|
- case OC_CONTINUE:
|
||
|
|
- debug_printf_parse("%s: OC_CONTINUE\n", __func__);
|
||
|
|
- n = chain_node(OC_EXEC);
|
||
|
|
- n->a.n = continue_ptr;
|
||
|
|
- chain_expr(t_info);
|
||
|
|
- break;
|
||
|
|
+ case OC_CONTINUE:
|
||
|
|
+ debug_printf_parse("%s: OC_CONTINUE\n", __func__);
|
||
|
|
+ n = chain_node(OC_EXEC);
|
||
|
|
+ n->a.n = continue_ptr;
|
||
|
|
+ chain_expr(t_info);
|
||
|
|
+ break;
|
||
|
|
|
||
|
|
- /* delete, next, nextfile, return, exit */
|
||
|
|
- default:
|
||
|
|
- debug_printf_parse("%s: default\n", __func__);
|
||
|
|
- chain_expr(t_info);
|
||
|
|
- }
|
||
|
|
+ /* delete, next, nextfile, return, exit */
|
||
|
|
+ default:
|
||
|
|
+ debug_printf_parse("%s: default\n", __func__);
|
||
|
|
+ chain_expr(t_info);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From bc9e60546c860c130ed9c312517fbbaf0ad51871 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 12:16:36 +0200
|
||
|
|
Subject: [PATCH 18/61] awk: fix parsing of expressions such as "v (a)"
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
next_token 812 825 +13
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 22 ++++++++++++++++++----
|
||
|
|
testsuite/awk.tests | 11 +++++++++++
|
||
|
|
2 files changed, 29 insertions(+), 4 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 34bcc1798..ce860dc04 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1231,11 +1231,24 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
while (isalnum_(*p))
|
||
|
|
p++;
|
||
|
|
end_of_name = p;
|
||
|
|
- tc = TC_VARIABLE;
|
||
|
|
- /* also consume whitespace between functionname and bracket */
|
||
|
|
- if (!(expected & TC_VARIABLE) || (expected & TC_ARRAY))
|
||
|
|
-//TODO: why if variable can be here (but not array ref), skipping is not allowed? Example where it matters?
|
||
|
|
+
|
||
|
|
+ if (last_token_class == TC_FUNCDECL)
|
||
|
|
+ /* eat space in "function FUNC (...) {...}" declaration */
|
||
|
|
p = skip_spaces(p);
|
||
|
|
+ else if (expected & TC_ARRAY) {
|
||
|
|
+ /* eat space between array name and [ */
|
||
|
|
+ char *s = skip_spaces(p);
|
||
|
|
+ if (*s == '[') /* array ref, not just a name? */
|
||
|
|
+ p = s;
|
||
|
|
+ }
|
||
|
|
+ /* else: do NOT consume whitespace after variable name!
|
||
|
|
+ * gawk allows definition "function FUNC (p) {...}" - note space,
|
||
|
|
+ * but disallows the call "FUNC (p)" because it isn't one -
|
||
|
|
+ * expression "v (a)" should NOT be parsed as TC_FUNCTION:
|
||
|
|
+ * it is a valid concatenation if "v" is a variable,
|
||
|
|
+ * not a function name (and type of name is not known at parse time).
|
||
|
|
+ */
|
||
|
|
+
|
||
|
|
if (*p == '(') {
|
||
|
|
p++;
|
||
|
|
tc = TC_FUNCTION;
|
||
|
|
@@ -1245,6 +1258,7 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
tc = TC_ARRAY;
|
||
|
|
debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
|
||
|
|
} else {
|
||
|
|
+ tc = TC_VARIABLE;
|
||
|
|
debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
|
||
|
|
if (end_of_name == p) {
|
||
|
|
/* there is no space for trailing NUL in t_string!
|
||
|
|
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
|
||
|
|
index cf9b722dc..6e35d33dd 100755
|
||
|
|
--- a/testsuite/awk.tests
|
||
|
|
+++ b/testsuite/awk.tests
|
||
|
|
@@ -71,6 +71,17 @@ testing "awk properly handles undefined function" \
|
||
|
|
"L1\n\nawk: cmd. line:5: Call to undefined function\n" \
|
||
|
|
"" ""
|
||
|
|
|
||
|
|
+prg='
|
||
|
|
+BEGIN {
|
||
|
|
+ v=1
|
||
|
|
+ a=2
|
||
|
|
+ print v (a)
|
||
|
|
+}'
|
||
|
|
+testing "'v (a)' is not a function call, it is a concatenation" \
|
||
|
|
+ "awk '$prg' 2>&1" \
|
||
|
|
+ "12\n" \
|
||
|
|
+ "" ""
|
||
|
|
+
|
||
|
|
|
||
|
|
optional DESKTOP
|
||
|
|
testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n"
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 08444111ee05f6514bcf6a8c8898ab4e4b827982 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 14:33:04 +0200
|
||
|
|
Subject: [PATCH 19/61] awk: document which hashes are used at what state
|
||
|
|
(parse/execute)
|
||
|
|
|
||
|
|
We can free them after they are no longer needed.
|
||
|
|
(Currently, being a NOEXEC applet is much larger waste of memory
|
||
|
|
for the case of long-running awk script).
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
awk_main 831 827 -4
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 30 ++++++++++++++++++++----------
|
||
|
|
1 file changed, 20 insertions(+), 10 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index ce860dc04..6142144bb 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -527,7 +527,10 @@ struct globals {
|
||
|
|
chain *seq;
|
||
|
|
node *break_ptr, *continue_ptr;
|
||
|
|
rstream *iF;
|
||
|
|
- xhash *vhash, *ahash, *fdhash, *fnhash;
|
||
|
|
+ xhash *ahash; /* argument names, used only while parsing function bodies */
|
||
|
|
+ xhash *fnhash; /* function names, used only in parsing stage */
|
||
|
|
+ xhash *vhash; /* variables and arrays */
|
||
|
|
+ xhash *fdhash; /* file objects, used only in execution stage */
|
||
|
|
const char *g_progname;
|
||
|
|
int g_lineno;
|
||
|
|
int nfields;
|
||
|
|
@@ -1719,6 +1722,7 @@ static void parse_program(char *p)
|
||
|
|
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
|
||
|
|
next_token(TC_FUNCTION);
|
||
|
|
f = newfunc(t_string);
|
||
|
|
+//FIXME: dup check: functions can't be redefined, this is not ok: awk 'func f(){}; func f(){}'
|
||
|
|
f->body.first = NULL;
|
||
|
|
f->nargs = 0;
|
||
|
|
/* func arg list: comma sep list of args, and a close paren */
|
||
|
|
@@ -3389,12 +3393,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
if (ENABLE_LOCALE_SUPPORT)
|
||
|
|
setlocale(LC_NUMERIC, "C");
|
||
|
|
|
||
|
|
- vhash = hash_init();
|
||
|
|
- ahash = hash_init();
|
||
|
|
- fdhash = hash_init();
|
||
|
|
- fnhash = hash_init();
|
||
|
|
-
|
||
|
|
/* initialize variables */
|
||
|
|
+ vhash = hash_init();
|
||
|
|
{
|
||
|
|
char *vnames = (char *)vNames; /* cheat */
|
||
|
|
char *vvalues = (char *)vValues;
|
||
|
|
@@ -3416,10 +3416,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
handle_special(intvar[FS]);
|
||
|
|
handle_special(intvar[RS]);
|
||
|
|
|
||
|
|
- newfile("/dev/stdin")->F = stdin;
|
||
|
|
- newfile("/dev/stdout")->F = stdout;
|
||
|
|
- newfile("/dev/stderr")->F = stderr;
|
||
|
|
-
|
||
|
|
/* Huh, people report that sometimes environ is NULL. Oh well. */
|
||
|
|
if (environ) {
|
||
|
|
char **envp;
|
||
|
|
@@ -3449,6 +3445,10 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
if (!is_assignment(llist_pop(&list_v)))
|
||
|
|
bb_show_usage();
|
||
|
|
}
|
||
|
|
+
|
||
|
|
+ /* Parse all supplied programs */
|
||
|
|
+ fnhash = hash_init();
|
||
|
|
+ ahash = hash_init();
|
||
|
|
while (list_f) {
|
||
|
|
int fd;
|
||
|
|
char *s;
|
||
|
|
@@ -3471,6 +3471,11 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
bb_show_usage();
|
||
|
|
parse_program(*argv++);
|
||
|
|
}
|
||
|
|
+ //free_hash(ahash) // ~250 bytes, arg names, used only during parse of function bodies
|
||
|
|
+ //ahash = NULL; // debug
|
||
|
|
+ //free_hash(fnhash) // ~250 bytes, used only for function names
|
||
|
|
+ //fnhash = NULL; // debug
|
||
|
|
+ /* parsing done, on to executing */
|
||
|
|
|
||
|
|
/* fill in ARGV array */
|
||
|
|
setari_u(intvar[ARGV], 0, "awk");
|
||
|
|
@@ -3479,6 +3484,11 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
setari_u(intvar[ARGV], ++i, *argv++);
|
||
|
|
setvar_i(intvar[ARGC], i + 1);
|
||
|
|
|
||
|
|
+ fdhash = hash_init();
|
||
|
|
+ newfile("/dev/stdin")->F = stdin;
|
||
|
|
+ newfile("/dev/stdout")->F = stdout;
|
||
|
|
+ newfile("/dev/stderr")->F = stderr;
|
||
|
|
+
|
||
|
|
zero_out_var(&tv);
|
||
|
|
evaluate(beginseq.first, &tv);
|
||
|
|
if (!mainseq.first && !endseq.first)
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From ce151c62189985344d90fc554f8780c7305112f8 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 18:33:25 +0200
|
||
|
|
Subject: [PATCH 20/61] awk: free unused parsing structures after parse is done
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
hash_clear - 90 +90
|
||
|
|
awk_main 827 849 +22
|
||
|
|
clear_array 90 - -90
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 1/1 grow/shrink: 1/0 up/down: 112/-90) Total: 22 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 74 ++++++++++++++++++++++++++++++++-------------------
|
||
|
|
1 file changed, 47 insertions(+), 27 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 6142144bb..4e29b28cf 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -530,7 +530,8 @@ struct globals {
|
||
|
|
xhash *ahash; /* argument names, used only while parsing function bodies */
|
||
|
|
xhash *fnhash; /* function names, used only in parsing stage */
|
||
|
|
xhash *vhash; /* variables and arrays */
|
||
|
|
- xhash *fdhash; /* file objects, used only in execution stage */
|
||
|
|
+ //xhash *fdhash; /* file objects, used only in execution stage */
|
||
|
|
+ //we are reusing ahash as fdhash, via define (see later)
|
||
|
|
const char *g_progname;
|
||
|
|
int g_lineno;
|
||
|
|
int nfields;
|
||
|
|
@@ -592,10 +593,13 @@ struct globals2 {
|
||
|
|
#define break_ptr (G1.break_ptr )
|
||
|
|
#define continue_ptr (G1.continue_ptr)
|
||
|
|
#define iF (G1.iF )
|
||
|
|
-#define vhash (G1.vhash )
|
||
|
|
#define ahash (G1.ahash )
|
||
|
|
-#define fdhash (G1.fdhash )
|
||
|
|
#define fnhash (G1.fnhash )
|
||
|
|
+#define vhash (G1.vhash )
|
||
|
|
+#define fdhash ahash
|
||
|
|
+//^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing,
|
||
|
|
+// and ends up empty after parsing phase. Thus, we can simply reuse it
|
||
|
|
+// for fdhash in execution stage.
|
||
|
|
#define g_progname (G1.g_progname )
|
||
|
|
#define g_lineno (G1.g_lineno )
|
||
|
|
#define nfields (G1.nfields )
|
||
|
|
@@ -682,6 +686,33 @@ static xhash *hash_init(void)
|
||
|
|
return newhash;
|
||
|
|
}
|
||
|
|
|
||
|
|
+static void hash_clear(xhash *hash)
|
||
|
|
+{
|
||
|
|
+ unsigned i;
|
||
|
|
+ hash_item *hi, *thi;
|
||
|
|
+
|
||
|
|
+ for (i = 0; i < hash->csize; i++) {
|
||
|
|
+ hi = hash->items[i];
|
||
|
|
+ while (hi) {
|
||
|
|
+ thi = hi;
|
||
|
|
+ hi = hi->next;
|
||
|
|
+ free(thi->data.v.string);
|
||
|
|
+ free(thi);
|
||
|
|
+ }
|
||
|
|
+ hash->items[i] = NULL;
|
||
|
|
+ }
|
||
|
|
+ hash->glen = hash->nel = 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+#if 0 //UNUSED
|
||
|
|
+static void hash_free(xhash *hash)
|
||
|
|
+{
|
||
|
|
+ hash_clear(hash);
|
||
|
|
+ free(hash->items);
|
||
|
|
+ free(hash);
|
||
|
|
+}
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
/* find item in hash, return ptr to data, NULL if not found */
|
||
|
|
static void *hash_search(xhash *hash, const char *name)
|
||
|
|
{
|
||
|
|
@@ -869,23 +900,7 @@ static xhash *iamarray(var *v)
|
||
|
|
return a->x.array;
|
||
|
|
}
|
||
|
|
|
||
|
|
-static void clear_array(xhash *array)
|
||
|
|
-{
|
||
|
|
- unsigned i;
|
||
|
|
- hash_item *hi, *thi;
|
||
|
|
-
|
||
|
|
- for (i = 0; i < array->csize; i++) {
|
||
|
|
- hi = array->items[i];
|
||
|
|
- while (hi) {
|
||
|
|
- thi = hi;
|
||
|
|
- hi = hi->next;
|
||
|
|
- free(thi->data.v.string);
|
||
|
|
- free(thi);
|
||
|
|
- }
|
||
|
|
- array->items[i] = NULL;
|
||
|
|
- }
|
||
|
|
- array->glen = array->nel = 0;
|
||
|
|
-}
|
||
|
|
+#define clear_array(array) hash_clear(array)
|
||
|
|
|
||
|
|
/* clear a variable */
|
||
|
|
static var *clrvar(var *v)
|
||
|
|
@@ -1742,7 +1757,7 @@ static void parse_program(char *p)
|
||
|
|
}
|
||
|
|
seq = &f->body;
|
||
|
|
chain_group();
|
||
|
|
- clear_array(ahash);
|
||
|
|
+ hash_clear(ahash);
|
||
|
|
} else if (tclass & TS_OPSEQ) {
|
||
|
|
debug_printf_parse("%s: TS_OPSEQ\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
@@ -3471,11 +3486,16 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
bb_show_usage();
|
||
|
|
parse_program(*argv++);
|
||
|
|
}
|
||
|
|
- //free_hash(ahash) // ~250 bytes, arg names, used only during parse of function bodies
|
||
|
|
- //ahash = NULL; // debug
|
||
|
|
- //free_hash(fnhash) // ~250 bytes, used only for function names
|
||
|
|
- //fnhash = NULL; // debug
|
||
|
|
- /* parsing done, on to executing */
|
||
|
|
+ /* Free unused parse structures */
|
||
|
|
+ //hash_free(fnhash); // ~250 bytes when empty, used only for function names
|
||
|
|
+ //^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs
|
||
|
|
+ // (IOW: hash_clear() assumes it's a hash of variables. fnhash is not).
|
||
|
|
+ free(fnhash->items);
|
||
|
|
+ free(fnhash);
|
||
|
|
+ fnhash = NULL; // debug
|
||
|
|
+ //hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing
|
||
|
|
+
|
||
|
|
+ /* Parsing done, on to executing */
|
||
|
|
|
||
|
|
/* fill in ARGV array */
|
||
|
|
setari_u(intvar[ARGV], 0, "awk");
|
||
|
|
@@ -3484,7 +3504,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
setari_u(intvar[ARGV], ++i, *argv++);
|
||
|
|
setvar_i(intvar[ARGC], i + 1);
|
||
|
|
|
||
|
|
- fdhash = hash_init();
|
||
|
|
+ //fdhash = ahash - done via define
|
||
|
|
newfile("/dev/stdin")->F = stdin;
|
||
|
|
newfile("/dev/stdout")->F = stdout;
|
||
|
|
newfile("/dev/stderr")->F = stderr;
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 465eba0f032c96966d2547f116784fb0d8751943 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Tue, 29 Jun 2021 19:07:36 +0200
|
||
|
|
Subject: [PATCH 21/61] awk: assorted optimizations
|
||
|
|
|
||
|
|
hash_find(): do not caclculate hash twice. Do not divide - can use
|
||
|
|
cheap multiply-by-8 shift.
|
||
|
|
|
||
|
|
nextword(): do not repeatedly increment in-memory value, do it in register,
|
||
|
|
then store final result.
|
||
|
|
|
||
|
|
hashwalk_init(): do not strlen() twice.
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
hash_search3 - 49 +49
|
||
|
|
hash_find 259 281 +22
|
||
|
|
nextword 19 16 -3
|
||
|
|
evaluate 3141 3137 -4
|
||
|
|
hash_search 54 28 -26
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 1/0 grow/shrink: 1/3 up/down: 71/-33) Total: 38 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 26 +++++++++++++++++---------
|
||
|
|
1 file changed, 17 insertions(+), 9 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 4e29b28cf..a4cd3cf93 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -696,6 +696,7 @@ static void hash_clear(xhash *hash)
|
||
|
|
while (hi) {
|
||
|
|
thi = hi;
|
||
|
|
hi = hi->next;
|
||
|
|
+//FIXME: this assumes that it's a hash of *variables*:
|
||
|
|
free(thi->data.v.string);
|
||
|
|
free(thi);
|
||
|
|
}
|
||
|
|
@@ -714,11 +715,11 @@ static void hash_free(xhash *hash)
|
||
|
|
#endif
|
||
|
|
|
||
|
|
/* find item in hash, return ptr to data, NULL if not found */
|
||
|
|
-static void *hash_search(xhash *hash, const char *name)
|
||
|
|
+static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx)
|
||
|
|
{
|
||
|
|
hash_item *hi;
|
||
|
|
|
||
|
|
- hi = hash->items[hashidx(name) % hash->csize];
|
||
|
|
+ hi = hash->items[idx % hash->csize];
|
||
|
|
while (hi) {
|
||
|
|
if (strcmp(hi->name, name) == 0)
|
||
|
|
return &hi->data;
|
||
|
|
@@ -727,6 +728,11 @@ static void *hash_search(xhash *hash, const char *name)
|
||
|
|
return NULL;
|
||
|
|
}
|
||
|
|
|
||
|
|
+static void *hash_search(xhash *hash, const char *name)
|
||
|
|
+{
|
||
|
|
+ return hash_search3(hash, name, hashidx(name));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* grow hash if it becomes too big */
|
||
|
|
static void hash_rebuild(xhash *hash)
|
||
|
|
{
|
||
|
|
@@ -762,16 +768,17 @@ static void *hash_find(xhash *hash, const char *name)
|
||
|
|
unsigned idx;
|
||
|
|
int l;
|
||
|
|
|
||
|
|
- hi = hash_search(hash, name);
|
||
|
|
+ idx = hashidx(name);
|
||
|
|
+ hi = hash_search3(hash, name, idx);
|
||
|
|
if (!hi) {
|
||
|
|
- if (++hash->nel / hash->csize > 10)
|
||
|
|
+ if (++hash->nel > hash->csize * 8)
|
||
|
|
hash_rebuild(hash);
|
||
|
|
|
||
|
|
l = strlen(name) + 1;
|
||
|
|
hi = xzalloc(sizeof(*hi) + l);
|
||
|
|
strcpy(hi->name, name);
|
||
|
|
|
||
|
|
- idx = hashidx(name) % hash->csize;
|
||
|
|
+ idx = idx % hash->csize;
|
||
|
|
hi->next = hash->items[idx];
|
||
|
|
hash->items[idx] = hi;
|
||
|
|
hash->glen += l;
|
||
|
|
@@ -822,8 +829,10 @@ static char *skip_spaces(char *p)
|
||
|
|
static char *nextword(char **s)
|
||
|
|
{
|
||
|
|
char *p = *s;
|
||
|
|
- while (*(*s)++ != '\0')
|
||
|
|
+ char *q = p;
|
||
|
|
+ while (*q++ != '\0')
|
||
|
|
continue;
|
||
|
|
+ *s = q;
|
||
|
|
return p;
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -2116,8 +2125,7 @@ static void hashwalk_init(var *v, xhash *array)
|
||
|
|
for (i = 0; i < array->csize; i++) {
|
||
|
|
hi = array->items[i];
|
||
|
|
while (hi) {
|
||
|
|
- strcpy(w->end, hi->name);
|
||
|
|
- nextword(&w->end);
|
||
|
|
+ w->end = stpcpy(w->end, hi->name) + 1;
|
||
|
|
hi = hi->next;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
@@ -3504,7 +3512,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
setari_u(intvar[ARGV], ++i, *argv++);
|
||
|
|
setvar_i(intvar[ARGC], i + 1);
|
||
|
|
|
||
|
|
- //fdhash = ahash - done via define
|
||
|
|
+ //fdhash = ahash; // done via define
|
||
|
|
newfile("/dev/stdin")->F = stdin;
|
||
|
|
newfile("/dev/stdout")->F = stdout;
|
||
|
|
newfile("/dev/stderr")->F = stderr;
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 467708ee9c852a4535d554214bb70b916743335a Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Wed, 30 Jun 2021 02:12:27 +0200
|
||
|
|
Subject: [PATCH 22/61] awk: remove custom pool allocator for temporary awk
|
||
|
|
variables
|
||
|
|
|
||
|
|
It seems to be designed to reduce overhead of malloc's auxiliary data,
|
||
|
|
by allocating at least 64 variables as a block.
|
||
|
|
With "struct var" being about 20-32 bytes long (32/64 bits),
|
||
|
|
malloc overhead for one temporary indeed is high, ~33% more memory used
|
||
|
|
than needed.
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
evaluate 3137 3145 +8
|
||
|
|
modprobe_main 798 803 +5
|
||
|
|
exec_builtin 1414 1419 +5
|
||
|
|
awk_printf 476 481 +5
|
||
|
|
as_regex 132 137 +5
|
||
|
|
EMSG_INTERNAL_ERROR 15 - -15
|
||
|
|
nvfree 169 116 -53
|
||
|
|
nvalloc 145 - -145
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/2 grow/shrink: 5/1 up/down: 28/-213) Total: -185 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 164 +++++++++++++++++++-------------------------------
|
||
|
|
1 file changed, 61 insertions(+), 103 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index a4cd3cf93..35c11ec58 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -93,7 +93,6 @@ enum {
|
||
|
|
};
|
||
|
|
|
||
|
|
#define MAXVARFMT 240
|
||
|
|
-#define MINNVBLOCK 64
|
||
|
|
|
||
|
|
/* variable flags */
|
||
|
|
#define VF_NUMBER 0x0001 /* 1 = primary type is number */
|
||
|
|
@@ -120,8 +119,8 @@ typedef struct walker_list {
|
||
|
|
/* Variable */
|
||
|
|
typedef struct var_s {
|
||
|
|
unsigned type; /* flags */
|
||
|
|
- double number;
|
||
|
|
char *string;
|
||
|
|
+ double number;
|
||
|
|
union {
|
||
|
|
int aidx; /* func arg idx (for compilation stage) */
|
||
|
|
struct xhash_s *array; /* array ptr */
|
||
|
|
@@ -192,15 +191,6 @@ typedef struct node_s {
|
||
|
|
} a;
|
||
|
|
} node;
|
||
|
|
|
||
|
|
-/* Block of temporary variables */
|
||
|
|
-typedef struct nvblock_s {
|
||
|
|
- int size;
|
||
|
|
- var *pos;
|
||
|
|
- struct nvblock_s *prev;
|
||
|
|
- struct nvblock_s *next;
|
||
|
|
- var nv[];
|
||
|
|
-} nvblock;
|
||
|
|
-
|
||
|
|
typedef struct tsplitter_s {
|
||
|
|
node n;
|
||
|
|
regex_t re[2];
|
||
|
|
@@ -537,7 +527,6 @@ struct globals {
|
||
|
|
int nfields;
|
||
|
|
int maxfields; /* used in fsrealloc() only */
|
||
|
|
var *Fields;
|
||
|
|
- nvblock *g_cb;
|
||
|
|
char *g_pos;
|
||
|
|
char g_saved_ch;
|
||
|
|
smallint icase;
|
||
|
|
@@ -605,7 +594,6 @@ struct globals2 {
|
||
|
|
#define nfields (G1.nfields )
|
||
|
|
#define maxfields (G1.maxfields )
|
||
|
|
#define Fields (G1.Fields )
|
||
|
|
-#define g_cb (G1.g_cb )
|
||
|
|
#define g_pos (G1.g_pos )
|
||
|
|
#define g_saved_ch (G1.g_saved_ch )
|
||
|
|
#define icase (G1.icase )
|
||
|
|
@@ -640,7 +628,6 @@ static int awk_exit(int) NORETURN;
|
||
|
|
|
||
|
|
/* ---- error handling ---- */
|
||
|
|
|
||
|
|
-static const char EMSG_INTERNAL_ERROR[] ALIGN1 = "Internal error";
|
||
|
|
static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
|
||
|
|
static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
|
||
|
|
static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
|
||
|
|
@@ -1050,77 +1037,6 @@ static int istrue(var *v)
|
||
|
|
return (v->string && v->string[0]);
|
||
|
|
}
|
||
|
|
|
||
|
|
-/* temporary variables allocator. Last allocated should be first freed */
|
||
|
|
-static var *nvalloc(int n)
|
||
|
|
-{
|
||
|
|
- nvblock *pb = NULL;
|
||
|
|
- var *v, *r;
|
||
|
|
- int size;
|
||
|
|
-
|
||
|
|
- while (g_cb) {
|
||
|
|
- pb = g_cb;
|
||
|
|
- if ((g_cb->pos - g_cb->nv) + n <= g_cb->size)
|
||
|
|
- break;
|
||
|
|
- g_cb = g_cb->next;
|
||
|
|
- }
|
||
|
|
-
|
||
|
|
- if (!g_cb) {
|
||
|
|
- size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
|
||
|
|
- g_cb = xzalloc(sizeof(nvblock) + size * sizeof(var));
|
||
|
|
- g_cb->size = size;
|
||
|
|
- g_cb->pos = g_cb->nv;
|
||
|
|
- g_cb->prev = pb;
|
||
|
|
- /*g_cb->next = NULL; - xzalloc did it */
|
||
|
|
- if (pb)
|
||
|
|
- pb->next = g_cb;
|
||
|
|
- }
|
||
|
|
-
|
||
|
|
- v = r = g_cb->pos;
|
||
|
|
- g_cb->pos += n;
|
||
|
|
-
|
||
|
|
- while (v < g_cb->pos) {
|
||
|
|
- v->type = 0;
|
||
|
|
- v->string = NULL;
|
||
|
|
- v++;
|
||
|
|
- }
|
||
|
|
-
|
||
|
|
- return r;
|
||
|
|
-}
|
||
|
|
-
|
||
|
|
-static void nvfree(var *v)
|
||
|
|
-{
|
||
|
|
- var *p;
|
||
|
|
-
|
||
|
|
- if (v < g_cb->nv || v >= g_cb->pos)
|
||
|
|
- syntax_error(EMSG_INTERNAL_ERROR);
|
||
|
|
-
|
||
|
|
- for (p = v; p < g_cb->pos; p++) {
|
||
|
|
- if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
|
||
|
|
- clear_array(iamarray(p));
|
||
|
|
- free(p->x.array->items);
|
||
|
|
- free(p->x.array);
|
||
|
|
- }
|
||
|
|
- if (p->type & VF_WALK) {
|
||
|
|
- walker_list *n;
|
||
|
|
- walker_list *w = p->x.walker;
|
||
|
|
- debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
|
||
|
|
- p->x.walker = NULL;
|
||
|
|
- while (w) {
|
||
|
|
- n = w->prev;
|
||
|
|
- debug_printf_walker(" free(%p)\n", w);
|
||
|
|
- free(w);
|
||
|
|
- w = n;
|
||
|
|
- }
|
||
|
|
- }
|
||
|
|
- clrvar(p);
|
||
|
|
- }
|
||
|
|
-
|
||
|
|
- g_cb->pos = v;
|
||
|
|
- while (g_cb->prev && g_cb->pos == g_cb->nv) {
|
||
|
|
- g_cb = g_cb->prev;
|
||
|
|
- }
|
||
|
|
-}
|
||
|
|
-
|
||
|
|
/* ------- awk program text parsing ------- */
|
||
|
|
|
||
|
|
/* Parse next token pointed by global pos, place results into global t_XYZ variables.
|
||
|
|
@@ -1793,6 +1709,41 @@ static void parse_program(char *p)
|
||
|
|
|
||
|
|
/* -------- program execution part -------- */
|
||
|
|
|
||
|
|
+/* temporary variables allocator */
|
||
|
|
+static var *nvalloc(int sz)
|
||
|
|
+{
|
||
|
|
+ return xzalloc(sz * sizeof(var));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static void nvfree(var *v, int sz)
|
||
|
|
+{
|
||
|
|
+ var *p = v;
|
||
|
|
+
|
||
|
|
+ while (--sz >= 0) {
|
||
|
|
+ if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
|
||
|
|
+ clear_array(iamarray(p));
|
||
|
|
+ free(p->x.array->items);
|
||
|
|
+ free(p->x.array);
|
||
|
|
+ }
|
||
|
|
+ if (p->type & VF_WALK) {
|
||
|
|
+ walker_list *n;
|
||
|
|
+ walker_list *w = p->x.walker;
|
||
|
|
+ debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
|
||
|
|
+ p->x.walker = NULL;
|
||
|
|
+ while (w) {
|
||
|
|
+ n = w->prev;
|
||
|
|
+ debug_printf_walker(" free(%p)\n", w);
|
||
|
|
+ free(w);
|
||
|
|
+ w = n;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ clrvar(p);
|
||
|
|
+ p++;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ free(v);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
static node *mk_splitter(const char *s, tsplitter *spl)
|
||
|
|
{
|
||
|
|
regex_t *re, *ire;
|
||
|
|
@@ -1814,9 +1765,9 @@ static node *mk_splitter(const char *s, tsplitter *spl)
|
||
|
|
return n;
|
||
|
|
}
|
||
|
|
|
||
|
|
-/* use node as a regular expression. Supplied with node ptr and regex_t
|
||
|
|
+/* Use node as a regular expression. Supplied with node ptr and regex_t
|
||
|
|
* storage space. Return ptr to regex (if result points to preg, it should
|
||
|
|
- * be later regfree'd manually
|
||
|
|
+ * be later regfree'd manually).
|
||
|
|
*/
|
||
|
|
static regex_t *as_regex(node *op, regex_t *preg)
|
||
|
|
{
|
||
|
|
@@ -1840,7 +1791,7 @@ static regex_t *as_regex(node *op, regex_t *preg)
|
||
|
|
cflags &= ~REG_EXTENDED;
|
||
|
|
xregcomp(preg, s, cflags);
|
||
|
|
}
|
||
|
|
- nvfree(v);
|
||
|
|
+ nvfree(v, 1);
|
||
|
|
return preg;
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -2292,6 +2243,8 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
var *v, *arg;
|
||
|
|
|
||
|
|
v = nvalloc(1);
|
||
|
|
+//TODO: above, to avoid allocating a single temporary var, take a pointer
|
||
|
|
+//to a temporary that our caller (evaluate()) already has?
|
||
|
|
fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
|
||
|
|
|
||
|
|
i = 0;
|
||
|
|
@@ -2333,7 +2286,7 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
}
|
||
|
|
|
||
|
|
free(fmt);
|
||
|
|
- nvfree(v);
|
||
|
|
+ nvfree(v, 1);
|
||
|
|
b = xrealloc(b, i + 1);
|
||
|
|
b[i] = '\0';
|
||
|
|
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
@@ -2661,14 +2614,14 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
- nvfree(tv);
|
||
|
|
+ nvfree(tv, 4);
|
||
|
|
return res;
|
||
|
|
#undef tspl
|
||
|
|
}
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Evaluate node - the heart of the program. Supplied with subtree
|
||
|
|
- * and place where to store result. returns ptr to result.
|
||
|
|
+ * and place where to store result. Returns ptr to result.
|
||
|
|
*/
|
||
|
|
#define XC(n) ((n) >> 8)
|
||
|
|
|
||
|
|
@@ -2953,33 +2906,38 @@ static var *evaluate(node *op, var *res)
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_FUNC ): {
|
||
|
|
- var *vbeg, *v;
|
||
|
|
+ var *tv, *sv_fnargs;
|
||
|
|
const char *sv_progname;
|
||
|
|
+ int nargs1, i;
|
||
|
|
+
|
||
|
|
debug_printf_eval("FUNC\n");
|
||
|
|
|
||
|
|
- /* The body might be empty, still has to eval the args */
|
||
|
|
if (!op->r.n->info && !op->r.f->body.first)
|
||
|
|
syntax_error(EMSG_UNDEF_FUNC);
|
||
|
|
|
||
|
|
- vbeg = v = nvalloc(op->r.f->nargs + 1);
|
||
|
|
+ /* The body might be empty, still has to eval the args */
|
||
|
|
+ nargs1 = op->r.f->nargs + 1;
|
||
|
|
+ tv = nvalloc(nargs1);
|
||
|
|
+ i = 0;
|
||
|
|
while (op1) {
|
||
|
|
+//TODO: explain why one iteration is done even for the case p->r.f->nargs == 0
|
||
|
|
var *arg = evaluate(nextarg(&op1), v1);
|
||
|
|
- copyvar(v, arg);
|
||
|
|
- v->type |= VF_CHILD;
|
||
|
|
- v->x.parent = arg;
|
||
|
|
- if (++v - vbeg >= op->r.f->nargs)
|
||
|
|
+ copyvar(&tv[i], arg);
|
||
|
|
+ tv[i].type |= VF_CHILD;
|
||
|
|
+ tv[i].x.parent = arg;
|
||
|
|
+ if (++i >= op->r.f->nargs)
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
- v = fnargs;
|
||
|
|
- fnargs = vbeg;
|
||
|
|
+ sv_fnargs = fnargs;
|
||
|
|
sv_progname = g_progname;
|
||
|
|
|
||
|
|
+ fnargs = tv;
|
||
|
|
res = evaluate(op->r.f->body.first, res);
|
||
|
|
+ nvfree(fnargs, nargs1);
|
||
|
|
|
||
|
|
g_progname = sv_progname;
|
||
|
|
- nvfree(fnargs);
|
||
|
|
- fnargs = v;
|
||
|
|
+ fnargs = sv_fnargs;
|
||
|
|
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
@@ -3301,7 +3259,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
break;
|
||
|
|
} /* while (op) */
|
||
|
|
|
||
|
|
- nvfree(v1);
|
||
|
|
+ nvfree(v1, 2);
|
||
|
|
debug_printf_eval("returning from %s(): %p\n", __func__, res);
|
||
|
|
return res;
|
||
|
|
#undef fnargs
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From c5ddfb36e34c93d63546bc3a7f458b946fa64825 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Wed, 30 Jun 2021 12:12:20 +0200
|
||
|
|
Subject: [PATCH 23/61] awk: replace incorrect use of union in undefined
|
||
|
|
function check (no code changes)
|
||
|
|
|
||
|
|
...which reveals that it's buggy: it thinks "func f(){}" is an undefined function!
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 2 +-
|
||
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 35c11ec58..1115085da 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -2912,7 +2912,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
|
||
|
|
debug_printf_eval("FUNC\n");
|
||
|
|
|
||
|
|
- if (!op->r.n->info && !op->r.f->body.first)
|
||
|
|
+ if (op->r.f->nargs == 0 && !op->r.f->body.first)
|
||
|
|
syntax_error(EMSG_UNDEF_FUNC);
|
||
|
|
|
||
|
|
/* The body might be empty, still has to eval the args */
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 1295da1db50adb2b6db53c6d057fdcc952b0bc78 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Wed, 30 Jun 2021 12:23:51 +0200
|
||
|
|
Subject: [PATCH 24/61] awk: allow empty fuinctions with no arguments, disallow
|
||
|
|
function redefinitions
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
.rodata 103681 103700 +19
|
||
|
|
parse_program 303 307 +4
|
||
|
|
evaluate 3145 3141 -4
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 2/1 up/down: 23/-4) Total: 19 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 11 +++++++----
|
||
|
|
testsuite/awk.tests | 10 ++++++++++
|
||
|
|
2 files changed, 17 insertions(+), 4 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 1115085da..c05d5d651 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -139,6 +139,7 @@ typedef struct chain_s {
|
||
|
|
/* Function */
|
||
|
|
typedef struct func_s {
|
||
|
|
unsigned nargs;
|
||
|
|
+ smallint defined;
|
||
|
|
struct chain_s body;
|
||
|
|
} func;
|
||
|
|
|
||
|
|
@@ -1662,9 +1663,11 @@ static void parse_program(char *p)
|
||
|
|
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
|
||
|
|
next_token(TC_FUNCTION);
|
||
|
|
f = newfunc(t_string);
|
||
|
|
-//FIXME: dup check: functions can't be redefined, this is not ok: awk 'func f(){}; func f(){}'
|
||
|
|
- f->body.first = NULL;
|
||
|
|
- f->nargs = 0;
|
||
|
|
+ if (f->defined)
|
||
|
|
+ syntax_error("Duplicate function");
|
||
|
|
+ f->defined = 1;
|
||
|
|
+ //f->body.first = NULL; - already is
|
||
|
|
+ //f->nargs = 0; - already is
|
||
|
|
/* func arg list: comma sep list of args, and a close paren */
|
||
|
|
for (;;) {
|
||
|
|
if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
|
||
|
|
@@ -2912,7 +2915,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
|
||
|
|
debug_printf_eval("FUNC\n");
|
||
|
|
|
||
|
|
- if (op->r.f->nargs == 0 && !op->r.f->body.first)
|
||
|
|
+ if (!op->r.f->defined)
|
||
|
|
syntax_error(EMSG_UNDEF_FUNC);
|
||
|
|
|
||
|
|
/* The body might be empty, still has to eval the args */
|
||
|
|
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
|
||
|
|
index 6e35d33dd..873cc3680 100755
|
||
|
|
--- a/testsuite/awk.tests
|
||
|
|
+++ b/testsuite/awk.tests
|
||
|
|
@@ -44,6 +44,16 @@ testing "awk handles empty function f(arg){}" \
|
||
|
|
"L1\n\nL2\n\n" \
|
||
|
|
"" ""
|
||
|
|
|
||
|
|
+prg='
|
||
|
|
+function empty_fun(){}
|
||
|
|
+END {empty_fun()
|
||
|
|
+ print "Ok"
|
||
|
|
+}'
|
||
|
|
+testing "awk handles empty function f(){}" \
|
||
|
|
+ "awk '$prg'" \
|
||
|
|
+ "Ok\n" \
|
||
|
|
+ "" ""
|
||
|
|
+
|
||
|
|
prg='
|
||
|
|
function outer_fun() {
|
||
|
|
return 1
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From d88539017ebe731ba507fda8def65969bd14e582 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Wed, 30 Jun 2021 12:42:39 +0200
|
||
|
|
Subject: [PATCH 25/61] awk: rewrite "print" logic a bit to make it clearer
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 9 ++++-----
|
||
|
|
1 file changed, 4 insertions(+), 5 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index c05d5d651..0fbca0433 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -2792,7 +2792,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
if (!op1) {
|
||
|
|
fputs(getvar_s(intvar[F0]), F);
|
||
|
|
} else {
|
||
|
|
- while (op1) {
|
||
|
|
+ for (;;) {
|
||
|
|
var *v = evaluate(nextarg(&op1), v1);
|
||
|
|
if (v->type & VF_NUMBER) {
|
||
|
|
fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
|
||
|
|
@@ -2801,13 +2801,12 @@ static var *evaluate(node *op, var *res)
|
||
|
|
} else {
|
||
|
|
fputs(getvar_s(v), F);
|
||
|
|
}
|
||
|
|
-
|
||
|
|
- if (op1)
|
||
|
|
- fputs(getvar_s(intvar[OFS]), F);
|
||
|
|
+ if (!op1)
|
||
|
|
+ break;
|
||
|
|
+ fputs(getvar_s(intvar[OFS]), F);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
fputs(getvar_s(intvar[ORS]), F);
|
||
|
|
-
|
||
|
|
} else { /* OC_PRINTF */
|
||
|
|
char *s = awk_printf(op1, &len);
|
||
|
|
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 04a90dbf88727415f4bcd3d1125d463255557d55 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Wed, 30 Jun 2021 12:52:51 +0200
|
||
|
|
Subject: [PATCH 26/61] awk: evaluate all, even superfluous function args
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
evaluate 3128 3135 +7
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 19 ++++++++++++-------
|
||
|
|
testsuite/awk.tests | 8 +++++++-
|
||
|
|
2 files changed, 19 insertions(+), 8 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 0fbca0433..47bbc10a6 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -2910,7 +2910,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
case XC( OC_FUNC ): {
|
||
|
|
var *tv, *sv_fnargs;
|
||
|
|
const char *sv_progname;
|
||
|
|
- int nargs1, i;
|
||
|
|
+ int nargs, i;
|
||
|
|
|
||
|
|
debug_printf_eval("FUNC\n");
|
||
|
|
|
||
|
|
@@ -2918,17 +2918,22 @@ static var *evaluate(node *op, var *res)
|
||
|
|
syntax_error(EMSG_UNDEF_FUNC);
|
||
|
|
|
||
|
|
/* The body might be empty, still has to eval the args */
|
||
|
|
- nargs1 = op->r.f->nargs + 1;
|
||
|
|
- tv = nvalloc(nargs1);
|
||
|
|
+ nargs = op->r.f->nargs;
|
||
|
|
+ tv = nvalloc(nargs);
|
||
|
|
i = 0;
|
||
|
|
while (op1) {
|
||
|
|
-//TODO: explain why one iteration is done even for the case p->r.f->nargs == 0
|
||
|
|
var *arg = evaluate(nextarg(&op1), v1);
|
||
|
|
+ if (i == nargs) {
|
||
|
|
+ /* call with more arguments than function takes.
|
||
|
|
+ * (gawk warns: "warning: function 'f' called with more arguments than declared").
|
||
|
|
+ * They are still evaluated, but discarded: */
|
||
|
|
+ clrvar(arg);
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
copyvar(&tv[i], arg);
|
||
|
|
tv[i].type |= VF_CHILD;
|
||
|
|
tv[i].x.parent = arg;
|
||
|
|
- if (++i >= op->r.f->nargs)
|
||
|
|
- break;
|
||
|
|
+ i++;
|
||
|
|
}
|
||
|
|
|
||
|
|
sv_fnargs = fnargs;
|
||
|
|
@@ -2936,7 +2941,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
|
||
|
|
fnargs = tv;
|
||
|
|
res = evaluate(op->r.f->body.first, res);
|
||
|
|
- nvfree(fnargs, nargs1);
|
||
|
|
+ nvfree(fnargs, nargs);
|
||
|
|
|
||
|
|
g_progname = sv_progname;
|
||
|
|
fnargs = sv_fnargs;
|
||
|
|
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
|
||
|
|
index 873cc3680..3c230393f 100755
|
||
|
|
--- a/testsuite/awk.tests
|
||
|
|
+++ b/testsuite/awk.tests
|
||
|
|
@@ -87,11 +87,17 @@ BEGIN {
|
||
|
|
a=2
|
||
|
|
print v (a)
|
||
|
|
}'
|
||
|
|
-testing "'v (a)' is not a function call, it is a concatenation" \
|
||
|
|
+testing "awk 'v (a)' is not a function call, it is a concatenation" \
|
||
|
|
"awk '$prg' 2>&1" \
|
||
|
|
"12\n" \
|
||
|
|
"" ""
|
||
|
|
|
||
|
|
+prg='func f(){print"F"};func g(){print"G"};BEGIN{f(g(),g())}'
|
||
|
|
+testing "awk unused function args are evaluated" \
|
||
|
|
+ "awk '$prg' 2>&1" \
|
||
|
|
+ "G\nG\nF\n" \
|
||
|
|
+ "" ""
|
||
|
|
+
|
||
|
|
|
||
|
|
optional DESKTOP
|
||
|
|
testing "awk hex const 1" "awk '{ print or(0xffffffff,1) }'" "4294967295\n" "" "\n"
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From fd5451c7894cd617a812d095a5d4d3cdc215b218 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Thu, 1 Jul 2021 16:02:16 +0200
|
||
|
|
Subject: [PATCH 27/61] awk: rename temp variables, no code changes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 76 +++++++++++++++++++++++++++++++--------------------
|
||
|
|
1 file changed, 46 insertions(+), 30 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 47bbc10a6..2c2cb74d7 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1775,14 +1775,14 @@ static node *mk_splitter(const char *s, tsplitter *spl)
|
||
|
|
static regex_t *as_regex(node *op, regex_t *preg)
|
||
|
|
{
|
||
|
|
int cflags;
|
||
|
|
- var *v;
|
||
|
|
+ var *tmpvar;
|
||
|
|
const char *s;
|
||
|
|
|
||
|
|
if ((op->info & OPCLSMASK) == OC_REGEXP) {
|
||
|
|
return icase ? op->r.ire : op->l.re;
|
||
|
|
}
|
||
|
|
- v = nvalloc(1);
|
||
|
|
- s = getvar_s(evaluate(op, v));
|
||
|
|
+ tmpvar = nvalloc(1);
|
||
|
|
+ s = getvar_s(evaluate(op, tmpvar));
|
||
|
|
|
||
|
|
cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
|
||
|
|
/* Testcase where REG_EXTENDED fails (unpaired '{'):
|
||
|
|
@@ -1794,7 +1794,7 @@ static regex_t *as_regex(node *op, regex_t *preg)
|
||
|
|
cflags &= ~REG_EXTENDED;
|
||
|
|
xregcomp(preg, s, cflags);
|
||
|
|
}
|
||
|
|
- nvfree(v, 1);
|
||
|
|
+ nvfree(tmpvar, 1);
|
||
|
|
return preg;
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -2243,12 +2243,12 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
const char *s1;
|
||
|
|
int i, j, incr, bsize;
|
||
|
|
char c, c1;
|
||
|
|
- var *v, *arg;
|
||
|
|
+ var *tmpvar, *arg;
|
||
|
|
|
||
|
|
- v = nvalloc(1);
|
||
|
|
+ tmpvar = nvalloc(1);
|
||
|
|
//TODO: above, to avoid allocating a single temporary var, take a pointer
|
||
|
|
//to a temporary that our caller (evaluate()) already has?
|
||
|
|
- fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), v)));
|
||
|
|
+ fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), tmpvar)));
|
||
|
|
|
||
|
|
i = 0;
|
||
|
|
while (*f) {
|
||
|
|
@@ -2268,7 +2268,7 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
f++;
|
||
|
|
c1 = *f;
|
||
|
|
*f = '\0';
|
||
|
|
- arg = evaluate(nextarg(&n), v);
|
||
|
|
+ arg = evaluate(nextarg(&n), tmpvar);
|
||
|
|
|
||
|
|
j = i;
|
||
|
|
if (c == 'c' || !c) {
|
||
|
|
@@ -2289,7 +2289,7 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
}
|
||
|
|
|
||
|
|
free(fmt);
|
||
|
|
- nvfree(v, 1);
|
||
|
|
+ nvfree(tmpvar, 1);
|
||
|
|
b = xrealloc(b, i + 1);
|
||
|
|
b[i] = '\0';
|
||
|
|
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
@@ -2429,7 +2429,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
{
|
||
|
|
#define tspl (G.exec_builtin__tspl)
|
||
|
|
|
||
|
|
- var *tv;
|
||
|
|
+ var *tmpvars;
|
||
|
|
node *an[4];
|
||
|
|
var *av[4];
|
||
|
|
const char *as[4];
|
||
|
|
@@ -2441,7 +2441,12 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
time_t tt;
|
||
|
|
int i, l, ll, n;
|
||
|
|
|
||
|
|
- tv = nvalloc(4);
|
||
|
|
+ tmpvars = nvalloc(4);
|
||
|
|
+#define TMPVAR0 (tmpvars)
|
||
|
|
+#define TMPVAR1 (tmpvars + 1)
|
||
|
|
+#define TMPVAR2 (tmpvars + 2)
|
||
|
|
+#define TMPVAR3 (tmpvars + 3)
|
||
|
|
+#define TMPVAR(i) (tmpvars + (i))
|
||
|
|
isr = info = op->info;
|
||
|
|
op = op->l.n;
|
||
|
|
|
||
|
|
@@ -2449,7 +2454,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
for (i = 0; i < 4 && op; i++) {
|
||
|
|
an[i] = nextarg(&op);
|
||
|
|
if (isr & 0x09000000)
|
||
|
|
- av[i] = evaluate(an[i], &tv[i]);
|
||
|
|
+ av[i] = evaluate(an[i], TMPVAR(i));
|
||
|
|
if (isr & 0x08000000)
|
||
|
|
as[i] = getvar_s(av[i]);
|
||
|
|
isr >>= 1;
|
||
|
|
@@ -2474,7 +2479,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
|
||
|
|
if (nargs > 2) {
|
||
|
|
spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
|
||
|
|
- an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
|
||
|
|
+ an[2] : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
|
||
|
|
} else {
|
||
|
|
spl = &fsplitter.n;
|
||
|
|
}
|
||
|
|
@@ -2617,7 +2622,13 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
- nvfree(tv, 4);
|
||
|
|
+ nvfree(tmpvars, 4);
|
||
|
|
+#undef TMPVAR0
|
||
|
|
+#undef TMPVAR1
|
||
|
|
+#undef TMPVAR2
|
||
|
|
+#undef TMPVAR3
|
||
|
|
+#undef TMPVAR
|
||
|
|
+
|
||
|
|
return res;
|
||
|
|
#undef tspl
|
||
|
|
}
|
||
|
|
@@ -2636,14 +2647,16 @@ static var *evaluate(node *op, var *res)
|
||
|
|
#define seed (G.evaluate__seed)
|
||
|
|
#define sreg (G.evaluate__sreg)
|
||
|
|
|
||
|
|
- var *v1;
|
||
|
|
+ var *tmpvars;
|
||
|
|
+#define TMPVAR0 (tmpvars)
|
||
|
|
+#define TMPVAR1 (tmpvars + 1)
|
||
|
|
|
||
|
|
if (!op)
|
||
|
|
return setvar_s(res, NULL);
|
||
|
|
|
||
|
|
debug_printf_eval("entered %s()\n", __func__);
|
||
|
|
|
||
|
|
- v1 = nvalloc(2);
|
||
|
|
+ tmpvars = nvalloc(2);
|
||
|
|
|
||
|
|
while (op) {
|
||
|
|
struct {
|
||
|
|
@@ -2683,7 +2696,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
}
|
||
|
|
if (op1->r.n) { /* array ref? */
|
||
|
|
const char *s;
|
||
|
|
- s = getvar_s(evaluate(op1->r.n, v1));
|
||
|
|
+ s = getvar_s(evaluate(op1->r.n, TMPVAR0));
|
||
|
|
hash_remove(iamarray(v), s);
|
||
|
|
} else {
|
||
|
|
clear_array(iamarray(v));
|
||
|
|
@@ -2693,7 +2706,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
|
||
|
|
/* execute inevitable things */
|
||
|
|
if (opinfo & OF_RES1)
|
||
|
|
- L.v = evaluate(op1, v1);
|
||
|
|
+ L.v = evaluate(op1, TMPVAR0);
|
||
|
|
if (opinfo & OF_STR1) {
|
||
|
|
L.s = getvar_s(L.v);
|
||
|
|
debug_printf_eval("L.s:'%s'\n", L.s);
|
||
|
|
@@ -2710,7 +2723,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
* (Seen trying to evaluate "$444 $44444")
|
||
|
|
*/
|
||
|
|
if (opinfo & OF_RES2) {
|
||
|
|
- R.v = evaluate(op->r.n, v1+1);
|
||
|
|
+ R.v = evaluate(op->r.n, TMPVAR1);
|
||
|
|
//TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
|
||
|
|
//L.v = NULL;
|
||
|
|
}
|
||
|
|
@@ -2793,7 +2806,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
fputs(getvar_s(intvar[F0]), F);
|
||
|
|
} else {
|
||
|
|
for (;;) {
|
||
|
|
- var *v = evaluate(nextarg(&op1), v1);
|
||
|
|
+ var *v = evaluate(nextarg(&op1), TMPVAR0);
|
||
|
|
if (v->type & VF_NUMBER) {
|
||
|
|
fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
|
||
|
|
getvar_i(v), TRUE);
|
||
|
|
@@ -2892,7 +2905,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
/* if source is a temporary string, jusk relink it to dest */
|
||
|
|
//Disabled: if R.v is numeric but happens to have cached R.v->string,
|
||
|
|
//then L.v ends up being a string, which is wrong
|
||
|
|
-// if (R.v == v1+1 && R.v->string) {
|
||
|
|
+// if (R.v == TMPVAR1 && R.v->string) {
|
||
|
|
// res = setvar_p(L.v, R.v->string);
|
||
|
|
// R.v->string = NULL;
|
||
|
|
// } else {
|
||
|
|
@@ -2908,7 +2921,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_FUNC ): {
|
||
|
|
- var *tv, *sv_fnargs;
|
||
|
|
+ var *argvars, *sv_fnargs;
|
||
|
|
const char *sv_progname;
|
||
|
|
int nargs, i;
|
||
|
|
|
||
|
|
@@ -2919,10 +2932,10 @@ static var *evaluate(node *op, var *res)
|
||
|
|
|
||
|
|
/* The body might be empty, still has to eval the args */
|
||
|
|
nargs = op->r.f->nargs;
|
||
|
|
- tv = nvalloc(nargs);
|
||
|
|
+ argvars = nvalloc(nargs);
|
||
|
|
i = 0;
|
||
|
|
while (op1) {
|
||
|
|
- var *arg = evaluate(nextarg(&op1), v1);
|
||
|
|
+ var *arg = evaluate(nextarg(&op1), TMPVAR0);
|
||
|
|
if (i == nargs) {
|
||
|
|
/* call with more arguments than function takes.
|
||
|
|
* (gawk warns: "warning: function 'f' called with more arguments than declared").
|
||
|
|
@@ -2930,18 +2943,18 @@ static var *evaluate(node *op, var *res)
|
||
|
|
clrvar(arg);
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
- copyvar(&tv[i], arg);
|
||
|
|
- tv[i].type |= VF_CHILD;
|
||
|
|
- tv[i].x.parent = arg;
|
||
|
|
+ copyvar(&argvars[i], arg);
|
||
|
|
+ argvars[i].type |= VF_CHILD;
|
||
|
|
+ argvars[i].x.parent = arg;
|
||
|
|
i++;
|
||
|
|
}
|
||
|
|
|
||
|
|
sv_fnargs = fnargs;
|
||
|
|
sv_progname = g_progname;
|
||
|
|
|
||
|
|
- fnargs = tv;
|
||
|
|
+ fnargs = argvars;
|
||
|
|
res = evaluate(op->r.f->body.first, res);
|
||
|
|
- nvfree(fnargs, nargs);
|
||
|
|
+ nvfree(argvars, nargs);
|
||
|
|
|
||
|
|
g_progname = sv_progname;
|
||
|
|
fnargs = sv_fnargs;
|
||
|
|
@@ -3266,7 +3279,10 @@ static var *evaluate(node *op, var *res)
|
||
|
|
break;
|
||
|
|
} /* while (op) */
|
||
|
|
|
||
|
|
- nvfree(v1, 2);
|
||
|
|
+ nvfree(tmpvars, 2);
|
||
|
|
+#undef TMPVAR0
|
||
|
|
+#undef TMPVAR1
|
||
|
|
+
|
||
|
|
debug_printf_eval("returning from %s(): %p\n", __func__, res);
|
||
|
|
return res;
|
||
|
|
#undef fnargs
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From b1abb8374ff4bd36d9e850a92ab7a3a7668615d2 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Thu, 1 Jul 2021 17:50:26 +0200
|
||
|
|
Subject: [PATCH 28/61] awk: use static tmpvars instead of nvalloc(1)ed ones
|
||
|
|
|
||
|
|
ptest() was using this idea already.
|
||
|
|
|
||
|
|
As far as I can see, this is safe. Ttestsuite passes.
|
||
|
|
|
||
|
|
One downside is that a temporary from e.g. printf invocation
|
||
|
|
won't be freed until the next printf call.
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
awk_printf 481 468 -13
|
||
|
|
as_regex 137 111 -26
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-39) Total: -39 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 49 ++++++++++++++++++++++++++++++++++---------------
|
||
|
|
1 file changed, 34 insertions(+), 15 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 2c2cb74d7..0be044eef 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -559,7 +559,9 @@ struct globals2 {
|
||
|
|
unsigned evaluate__seed;
|
||
|
|
regex_t evaluate__sreg;
|
||
|
|
|
||
|
|
- var ptest__v;
|
||
|
|
+ var ptest__tmpvar;
|
||
|
|
+ var awk_printf__tmpvar;
|
||
|
|
+ var as_regex__tmpvar;
|
||
|
|
|
||
|
|
tsplitter exec_builtin__tspl;
|
||
|
|
|
||
|
|
@@ -1775,14 +1777,19 @@ static node *mk_splitter(const char *s, tsplitter *spl)
|
||
|
|
static regex_t *as_regex(node *op, regex_t *preg)
|
||
|
|
{
|
||
|
|
int cflags;
|
||
|
|
- var *tmpvar;
|
||
|
|
const char *s;
|
||
|
|
|
||
|
|
if ((op->info & OPCLSMASK) == OC_REGEXP) {
|
||
|
|
return icase ? op->r.ire : op->l.re;
|
||
|
|
}
|
||
|
|
- tmpvar = nvalloc(1);
|
||
|
|
- s = getvar_s(evaluate(op, tmpvar));
|
||
|
|
+
|
||
|
|
+#define TMPVAR (&G.as_regex__tmpvar)
|
||
|
|
+ //tmpvar = nvalloc(1);
|
||
|
|
+ // We use a single "static" tmpvar (instead of on-stack or malloced one)
|
||
|
|
+ // to decrease memory consumption in deeply-recursive awk programs.
|
||
|
|
+ // The rule to work safely is to never call evaluate() while our static
|
||
|
|
+ // TMPVAR's value is still needed.
|
||
|
|
+ s = getvar_s(evaluate(op, TMPVAR));
|
||
|
|
|
||
|
|
cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
|
||
|
|
/* Testcase where REG_EXTENDED fails (unpaired '{'):
|
||
|
|
@@ -1794,7 +1801,8 @@ static regex_t *as_regex(node *op, regex_t *preg)
|
||
|
|
cflags &= ~REG_EXTENDED;
|
||
|
|
xregcomp(preg, s, cflags);
|
||
|
|
}
|
||
|
|
- nvfree(tmpvar, 1);
|
||
|
|
+ //nvfree(tmpvar, 1);
|
||
|
|
+#undef TMPVAR
|
||
|
|
return preg;
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -2105,8 +2113,11 @@ static int hashwalk_next(var *v)
|
||
|
|
/* evaluate node, return 1 when result is true, 0 otherwise */
|
||
|
|
static int ptest(node *pattern)
|
||
|
|
{
|
||
|
|
- /* ptest__v is "static": to save stack space? */
|
||
|
|
- return istrue(evaluate(pattern, &G.ptest__v));
|
||
|
|
+ // We use a single "static" tmpvar (instead of on-stack or malloced one)
|
||
|
|
+ // to decrease memory consumption in deeply-recursive awk programs.
|
||
|
|
+ // The rule to work safely is to never call evaluate() while our static
|
||
|
|
+ // TMPVAR's value is still needed.
|
||
|
|
+ return istrue(evaluate(pattern, &G.ptest__tmpvar));
|
||
|
|
}
|
||
|
|
|
||
|
|
/* read next record from stream rsm into a variable v */
|
||
|
|
@@ -2243,12 +2254,18 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
const char *s1;
|
||
|
|
int i, j, incr, bsize;
|
||
|
|
char c, c1;
|
||
|
|
- var *tmpvar, *arg;
|
||
|
|
-
|
||
|
|
- tmpvar = nvalloc(1);
|
||
|
|
-//TODO: above, to avoid allocating a single temporary var, take a pointer
|
||
|
|
-//to a temporary that our caller (evaluate()) already has?
|
||
|
|
- fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), tmpvar)));
|
||
|
|
+ var *arg;
|
||
|
|
+
|
||
|
|
+ //tmpvar = nvalloc(1);
|
||
|
|
+#define TMPVAR (&G.awk_printf__tmpvar)
|
||
|
|
+ // We use a single "static" tmpvar (instead of on-stack or malloced one)
|
||
|
|
+ // to decrease memory consumption in deeply-recursive awk programs.
|
||
|
|
+ // The rule to work safely is to never call evaluate() while our static
|
||
|
|
+ // TMPVAR's value is still needed.
|
||
|
|
+ fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR)));
|
||
|
|
+ // ^^^^^^^^^ here we immediately strdup() the value, so the later call
|
||
|
|
+ // to evaluate() potentially recursing into another awk_printf() can't
|
||
|
|
+ // mangle the value.
|
||
|
|
|
||
|
|
i = 0;
|
||
|
|
while (*f) {
|
||
|
|
@@ -2268,7 +2285,7 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
f++;
|
||
|
|
c1 = *f;
|
||
|
|
*f = '\0';
|
||
|
|
- arg = evaluate(nextarg(&n), tmpvar);
|
||
|
|
+ arg = evaluate(nextarg(&n), TMPVAR);
|
||
|
|
|
||
|
|
j = i;
|
||
|
|
if (c == 'c' || !c) {
|
||
|
|
@@ -2289,7 +2306,9 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
}
|
||
|
|
|
||
|
|
free(fmt);
|
||
|
|
- nvfree(tmpvar, 1);
|
||
|
|
+// nvfree(tmpvar, 1);
|
||
|
|
+#undef TMPVAR
|
||
|
|
+
|
||
|
|
b = xrealloc(b, i + 1);
|
||
|
|
b[i] = '\0';
|
||
|
|
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From de5007b20bc226273fb50130f2cb0fcaf7abfd3b Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 14:27:40 +0200
|
||
|
|
Subject: [PATCH 29/61] awk: shuffle functions to reduce forward declarations,
|
||
|
|
no code changes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 192 ++++++++++++++++++++++++--------------------------
|
||
|
|
1 file changed, 94 insertions(+), 98 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 0be044eef..6833c2f0d 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -619,18 +619,6 @@ struct globals2 {
|
||
|
|
G.evaluate__seed = 1; \
|
||
|
|
} while (0)
|
||
|
|
|
||
|
|
-
|
||
|
|
-/* function prototypes */
|
||
|
|
-static void handle_special(var *);
|
||
|
|
-static node *parse_expr(uint32_t);
|
||
|
|
-static void chain_group(void);
|
||
|
|
-static var *evaluate(node *, var *);
|
||
|
|
-static rstream *next_input_file(void);
|
||
|
|
-static int fmt_num(char *, int, const char *, double, int);
|
||
|
|
-static int awk_exit(int) NORETURN;
|
||
|
|
-
|
||
|
|
-/* ---- error handling ---- */
|
||
|
|
-
|
||
|
|
static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
|
||
|
|
static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
|
||
|
|
static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
|
||
|
|
@@ -642,10 +630,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
|
||
|
|
static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
|
||
|
|
static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
|
||
|
|
|
||
|
|
-static void zero_out_var(var *vp)
|
||
|
|
-{
|
||
|
|
- memset(vp, 0, sizeof(*vp));
|
||
|
|
-}
|
||
|
|
+static int awk_exit(int) NORETURN;
|
||
|
|
|
||
|
|
static void syntax_error(const char *message) NORETURN;
|
||
|
|
static void syntax_error(const char *message)
|
||
|
|
@@ -653,6 +638,11 @@ static void syntax_error(const char *message)
|
||
|
|
bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
|
||
|
|
}
|
||
|
|
|
||
|
|
+static void zero_out_var(var *vp)
|
||
|
|
+{
|
||
|
|
+ memset(vp, 0, sizeof(*vp));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* ---- hash stuff ---- */
|
||
|
|
|
||
|
|
static unsigned hashidx(const char *name)
|
||
|
|
@@ -885,10 +875,29 @@ static double my_strtod(char **pp)
|
||
|
|
|
||
|
|
/* -------- working with variables (set/get/copy/etc) -------- */
|
||
|
|
|
||
|
|
-static xhash *iamarray(var *v)
|
||
|
|
+static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
|
||
|
|
{
|
||
|
|
- var *a = v;
|
||
|
|
+ int r = 0;
|
||
|
|
+ char c;
|
||
|
|
+ const char *s = format;
|
||
|
|
+
|
||
|
|
+ if (int_as_int && n == (long long)n) {
|
||
|
|
+ r = snprintf(b, size, "%lld", (long long)n);
|
||
|
|
+ } else {
|
||
|
|
+ do { c = *s; } while (c && *++s);
|
||
|
|
+ if (strchr("diouxX", c)) {
|
||
|
|
+ r = snprintf(b, size, format, (int)n);
|
||
|
|
+ } else if (strchr("eEfgG", c)) {
|
||
|
|
+ r = snprintf(b, size, format, n);
|
||
|
|
+ } else {
|
||
|
|
+ syntax_error(EMSG_INV_FMT);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ return r;
|
||
|
|
+}
|
||
|
|
|
||
|
|
+static xhash *iamarray(var *a)
|
||
|
|
+{
|
||
|
|
while (a->type & VF_CHILD)
|
||
|
|
a = a->x.parent;
|
||
|
|
|
||
|
|
@@ -913,6 +922,8 @@ static var *clrvar(var *v)
|
||
|
|
return v;
|
||
|
|
}
|
||
|
|
|
||
|
|
+static void handle_special(var *);
|
||
|
|
+
|
||
|
|
/* assign string value to variable */
|
||
|
|
static var *setvar_p(var *v, char *value)
|
||
|
|
{
|
||
|
|
@@ -1284,6 +1295,8 @@ static void mk_re_node(const char *s, node *n, regex_t *re)
|
||
|
|
xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
|
||
|
|
}
|
||
|
|
|
||
|
|
+static node *parse_expr(uint32_t);
|
||
|
|
+
|
||
|
|
static node *parse_lrparen_list(void)
|
||
|
|
{
|
||
|
|
next_token(TC_LPAREN);
|
||
|
|
@@ -1488,6 +1501,8 @@ static void chain_expr(uint32_t info)
|
||
|
|
rollback_token();
|
||
|
|
}
|
||
|
|
|
||
|
|
+static void chain_group(void);
|
||
|
|
+
|
||
|
|
static node *chain_loop(node *nn)
|
||
|
|
{
|
||
|
|
node *n, *n2, *save_brk, *save_cont;
|
||
|
|
@@ -1770,6 +1785,8 @@ static node *mk_splitter(const char *s, tsplitter *spl)
|
||
|
|
return n;
|
||
|
|
}
|
||
|
|
|
||
|
|
+static var *evaluate(node *, var *);
|
||
|
|
+
|
||
|
|
/* Use node as a regular expression. Supplied with node ptr and regex_t
|
||
|
|
* storage space. Return ptr to regex (if result points to preg, it should
|
||
|
|
* be later regfree'd manually).
|
||
|
|
@@ -2222,27 +2239,6 @@ static int awk_getline(rstream *rsm, var *v)
|
||
|
|
return r;
|
||
|
|
}
|
||
|
|
|
||
|
|
-static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
|
||
|
|
-{
|
||
|
|
- int r = 0;
|
||
|
|
- char c;
|
||
|
|
- const char *s = format;
|
||
|
|
-
|
||
|
|
- if (int_as_int && n == (long long)n) {
|
||
|
|
- r = snprintf(b, size, "%lld", (long long)n);
|
||
|
|
- } else {
|
||
|
|
- do { c = *s; } while (c && *++s);
|
||
|
|
- if (strchr("diouxX", c)) {
|
||
|
|
- r = snprintf(b, size, format, (int)n);
|
||
|
|
- } else if (strchr("eEfgG", c)) {
|
||
|
|
- r = snprintf(b, size, format, n);
|
||
|
|
- } else {
|
||
|
|
- syntax_error(EMSG_INV_FMT);
|
||
|
|
- }
|
||
|
|
- }
|
||
|
|
- return r;
|
||
|
|
-}
|
||
|
|
-
|
||
|
|
/* formatted output into an allocated buffer, return ptr to buffer */
|
||
|
|
#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
# define awk_printf(a, b) awk_printf(a)
|
||
|
|
@@ -2306,7 +2302,7 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
}
|
||
|
|
|
||
|
|
free(fmt);
|
||
|
|
-// nvfree(tmpvar, 1);
|
||
|
|
+ //nvfree(tmpvar, 1);
|
||
|
|
#undef TMPVAR
|
||
|
|
|
||
|
|
b = xrealloc(b, i + 1);
|
||
|
|
@@ -2652,6 +2648,64 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
#undef tspl
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* if expr looks like "var=value", perform assignment and return 1,
|
||
|
|
+ * otherwise return 0 */
|
||
|
|
+static int is_assignment(const char *expr)
|
||
|
|
+{
|
||
|
|
+ char *exprc, *val;
|
||
|
|
+
|
||
|
|
+ if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
|
||
|
|
+ return FALSE;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ exprc = xstrdup(expr);
|
||
|
|
+ val = exprc + (val - expr);
|
||
|
|
+ *val++ = '\0';
|
||
|
|
+
|
||
|
|
+ unescape_string_in_place(val);
|
||
|
|
+ setvar_u(newvar(exprc), val);
|
||
|
|
+ free(exprc);
|
||
|
|
+ return TRUE;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* switch to next input file */
|
||
|
|
+static rstream *next_input_file(void)
|
||
|
|
+{
|
||
|
|
+#define rsm (G.next_input_file__rsm)
|
||
|
|
+#define files_happen (G.next_input_file__files_happen)
|
||
|
|
+
|
||
|
|
+ FILE *F;
|
||
|
|
+ const char *fname, *ind;
|
||
|
|
+
|
||
|
|
+ if (rsm.F)
|
||
|
|
+ fclose(rsm.F);
|
||
|
|
+ rsm.F = NULL;
|
||
|
|
+ rsm.pos = rsm.adv = 0;
|
||
|
|
+
|
||
|
|
+ for (;;) {
|
||
|
|
+ if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
|
||
|
|
+ if (files_happen)
|
||
|
|
+ return NULL;
|
||
|
|
+ fname = "-";
|
||
|
|
+ F = stdin;
|
||
|
|
+ break;
|
||
|
|
+ }
|
||
|
|
+ ind = getvar_s(incvar(intvar[ARGIND]));
|
||
|
|
+ fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
|
||
|
|
+ if (fname && *fname && !is_assignment(fname)) {
|
||
|
|
+ F = xfopen_stdin(fname);
|
||
|
|
+ break;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ files_happen = TRUE;
|
||
|
|
+ setvar_s(intvar[FILENAME], fname);
|
||
|
|
+ rsm.F = F;
|
||
|
|
+ return &rsm;
|
||
|
|
+#undef rsm
|
||
|
|
+#undef files_happen
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/*
|
||
|
|
* Evaluate node - the heart of the program. Supplied with subtree
|
||
|
|
* and place where to store result. Returns ptr to result.
|
||
|
|
@@ -3338,64 +3392,6 @@ static int awk_exit(int r)
|
||
|
|
exit(r);
|
||
|
|
}
|
||
|
|
|
||
|
|
-/* if expr looks like "var=value", perform assignment and return 1,
|
||
|
|
- * otherwise return 0 */
|
||
|
|
-static int is_assignment(const char *expr)
|
||
|
|
-{
|
||
|
|
- char *exprc, *val;
|
||
|
|
-
|
||
|
|
- if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
|
||
|
|
- return FALSE;
|
||
|
|
- }
|
||
|
|
-
|
||
|
|
- exprc = xstrdup(expr);
|
||
|
|
- val = exprc + (val - expr);
|
||
|
|
- *val++ = '\0';
|
||
|
|
-
|
||
|
|
- unescape_string_in_place(val);
|
||
|
|
- setvar_u(newvar(exprc), val);
|
||
|
|
- free(exprc);
|
||
|
|
- return TRUE;
|
||
|
|
-}
|
||
|
|
-
|
||
|
|
-/* switch to next input file */
|
||
|
|
-static rstream *next_input_file(void)
|
||
|
|
-{
|
||
|
|
-#define rsm (G.next_input_file__rsm)
|
||
|
|
-#define files_happen (G.next_input_file__files_happen)
|
||
|
|
-
|
||
|
|
- FILE *F;
|
||
|
|
- const char *fname, *ind;
|
||
|
|
-
|
||
|
|
- if (rsm.F)
|
||
|
|
- fclose(rsm.F);
|
||
|
|
- rsm.F = NULL;
|
||
|
|
- rsm.pos = rsm.adv = 0;
|
||
|
|
-
|
||
|
|
- for (;;) {
|
||
|
|
- if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
|
||
|
|
- if (files_happen)
|
||
|
|
- return NULL;
|
||
|
|
- fname = "-";
|
||
|
|
- F = stdin;
|
||
|
|
- break;
|
||
|
|
- }
|
||
|
|
- ind = getvar_s(incvar(intvar[ARGIND]));
|
||
|
|
- fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
|
||
|
|
- if (fname && *fname && !is_assignment(fname)) {
|
||
|
|
- F = xfopen_stdin(fname);
|
||
|
|
- break;
|
||
|
|
- }
|
||
|
|
- }
|
||
|
|
-
|
||
|
|
- files_happen = TRUE;
|
||
|
|
- setvar_s(intvar[FILENAME], fname);
|
||
|
|
- rsm.F = F;
|
||
|
|
- return &rsm;
|
||
|
|
-#undef rsm
|
||
|
|
-#undef files_happen
|
||
|
|
-}
|
||
|
|
-
|
||
|
|
int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
|
||
|
|
int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
{
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From c14ab33f2d8eb07dbf27570be30121cc9734ba04 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 14:29:01 +0200
|
||
|
|
Subject: [PATCH 30/61] awk: when parsing length(), simplify eating of LPAREN
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
parse_expr 945 948 +3
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 7 ++++---
|
||
|
|
1 file changed, 4 insertions(+), 3 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 6833c2f0d..f65449a09 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1453,10 +1453,11 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
| TC_BINOPX /* length <op> NUM */
|
||
|
|
| TC_COMMA /* print length, 1 */
|
||
|
|
);
|
||
|
|
- rollback_token();
|
||
|
|
- if (tc & TC_LPAREN) {
|
||
|
|
+ if (tc != TC_LPAREN)
|
||
|
|
+ rollback_token();
|
||
|
|
+ else {
|
||
|
|
/* It was a "(" token. Handle just like TC_BUILTIN */
|
||
|
|
- cn->l.n = parse_lrparen_list();
|
||
|
|
+ cn->l.n = parse_expr(TC_RPAREN);
|
||
|
|
}
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 8be97151d5ba9f98f27f58068416c203565708d0 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 14:33:13 +0200
|
||
|
|
Subject: [PATCH 31/61] awk: use "static" tmpvars in main and exit
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
awk_exit 103 93 -10
|
||
|
|
awk_main 850 832 -18
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-28) Total: -28 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 17 +++++------------
|
||
|
|
1 file changed, 5 insertions(+), 12 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index f65449a09..9f5a94037 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -562,6 +562,8 @@ struct globals2 {
|
||
|
|
var ptest__tmpvar;
|
||
|
|
var awk_printf__tmpvar;
|
||
|
|
var as_regex__tmpvar;
|
||
|
|
+ var exit__tmpvar;
|
||
|
|
+ var main__tmpvar;
|
||
|
|
|
||
|
|
tsplitter exec_builtin__tspl;
|
||
|
|
|
||
|
|
@@ -638,11 +640,6 @@ static void syntax_error(const char *message)
|
||
|
|
bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
|
||
|
|
}
|
||
|
|
|
||
|
|
-static void zero_out_var(var *vp)
|
||
|
|
-{
|
||
|
|
- memset(vp, 0, sizeof(*vp));
|
||
|
|
-}
|
||
|
|
-
|
||
|
|
/* ---- hash stuff ---- */
|
||
|
|
|
||
|
|
static unsigned hashidx(const char *name)
|
||
|
|
@@ -3372,11 +3369,9 @@ static int awk_exit(int r)
|
||
|
|
unsigned i;
|
||
|
|
|
||
|
|
if (!exiting) {
|
||
|
|
- var tv;
|
||
|
|
exiting = TRUE;
|
||
|
|
nextrec = FALSE;
|
||
|
|
- zero_out_var(&tv);
|
||
|
|
- evaluate(endseq.first, &tv);
|
||
|
|
+ evaluate(endseq.first, &G.exit__tmpvar);
|
||
|
|
}
|
||
|
|
|
||
|
|
/* waiting for children */
|
||
|
|
@@ -3404,7 +3399,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
llist_t *list_e = NULL;
|
||
|
|
#endif
|
||
|
|
int i;
|
||
|
|
- var tv;
|
||
|
|
|
||
|
|
INIT_G();
|
||
|
|
|
||
|
|
@@ -3514,8 +3508,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
newfile("/dev/stdout")->F = stdout;
|
||
|
|
newfile("/dev/stderr")->F = stderr;
|
||
|
|
|
||
|
|
- zero_out_var(&tv);
|
||
|
|
- evaluate(beginseq.first, &tv);
|
||
|
|
+ evaluate(beginseq.first, &G.main__tmpvar);
|
||
|
|
if (!mainseq.first && !endseq.first)
|
||
|
|
awk_exit(EXIT_SUCCESS);
|
||
|
|
|
||
|
|
@@ -3532,7 +3525,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
nextrec = FALSE;
|
||
|
|
incvar(intvar[NR]);
|
||
|
|
incvar(intvar[FNR]);
|
||
|
|
- evaluate(mainseq.first, &tv);
|
||
|
|
+ evaluate(mainseq.first, &G.main__tmpvar);
|
||
|
|
|
||
|
|
if (nextfile)
|
||
|
|
break;
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 7f4cd583daf8dcb431f07fd3402ca7ddc11b21ab Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 14:53:52 +0200
|
||
|
|
Subject: [PATCH 32/61] awk: shuffle globals for smaller offsets
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
awk_main 832 829 -3
|
||
|
|
evaluate 3229 3223 -6
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-9) Total: -9 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 25 +++++++++++++------------
|
||
|
|
1 file changed, 13 insertions(+), 12 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 9f5a94037..068ed687b 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -536,6 +536,11 @@ struct globals {
|
||
|
|
smallint nextfile;
|
||
|
|
smallint is_f0_split;
|
||
|
|
smallint t_rollback;
|
||
|
|
+
|
||
|
|
+ /* former statics from various functions */
|
||
|
|
+ smallint next_token__concat_inserted;
|
||
|
|
+ uint32_t next_token__save_tclass;
|
||
|
|
+ uint32_t next_token__save_info;
|
||
|
|
};
|
||
|
|
struct globals2 {
|
||
|
|
uint32_t t_info; /* often used */
|
||
|
|
@@ -548,15 +553,11 @@ struct globals2 {
|
||
|
|
/* former statics from various functions */
|
||
|
|
char *split_f0__fstrings;
|
||
|
|
|
||
|
|
- uint32_t next_token__save_tclass;
|
||
|
|
- uint32_t next_token__save_info;
|
||
|
|
- smallint next_token__concat_inserted;
|
||
|
|
-
|
||
|
|
- smallint next_input_file__files_happen;
|
||
|
|
rstream next_input_file__rsm;
|
||
|
|
+ smallint next_input_file__files_happen;
|
||
|
|
|
||
|
|
- var *evaluate__fnargs;
|
||
|
|
unsigned evaluate__seed;
|
||
|
|
+ var *evaluate__fnargs;
|
||
|
|
regex_t evaluate__sreg;
|
||
|
|
|
||
|
|
var ptest__tmpvar;
|
||
|
|
@@ -575,10 +576,10 @@ struct globals2 {
|
||
|
|
#define G1 (ptr_to_globals[-1])
|
||
|
|
#define G (*(struct globals2 *)ptr_to_globals)
|
||
|
|
/* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
|
||
|
|
-/*char G1size[sizeof(G1)]; - 0x74 */
|
||
|
|
-/*char Gsize[sizeof(G)]; - 0x1c4 */
|
||
|
|
+//char G1size[sizeof(G1)]; // 0x70
|
||
|
|
+//char Gsize[sizeof(G)]; // 0x2f8
|
||
|
|
/* Trying to keep most of members accessible with short offsets: */
|
||
|
|
-/*char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; - 0x90 */
|
||
|
|
+//char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c
|
||
|
|
#define t_double (G1.t_double )
|
||
|
|
#define beginseq (G1.beginseq )
|
||
|
|
#define mainseq (G1.mainseq )
|
||
|
|
@@ -1056,9 +1057,9 @@ static int istrue(var *v)
|
||
|
|
*/
|
||
|
|
static uint32_t next_token(uint32_t expected)
|
||
|
|
{
|
||
|
|
-#define concat_inserted (G.next_token__concat_inserted)
|
||
|
|
-#define save_tclass (G.next_token__save_tclass)
|
||
|
|
-#define save_info (G.next_token__save_info)
|
||
|
|
+#define concat_inserted (G1.next_token__concat_inserted)
|
||
|
|
+#define save_tclass (G1.next_token__save_tclass)
|
||
|
|
+#define save_info (G1.next_token__save_info)
|
||
|
|
|
||
|
|
char *p;
|
||
|
|
const char *tl;
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 51262cc2c47f586d9478cc3c4f4977d98b36222b Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 15:19:14 +0200
|
||
|
|
Subject: [PATCH 33/61] awk: do not special-case "delete"
|
||
|
|
|
||
|
|
Rework of the previous fix:
|
||
|
|
Can use operation attributes to disable arg evaluation instead of special-casing.
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
.rodata 104032 104036 +4
|
||
|
|
evaluate 3223 3215 -8
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 1/1 up/down: 4/-8) Total: -4 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 56 +++++++++++++++++++++++++--------------------------
|
||
|
|
1 file changed, 27 insertions(+), 29 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 068ed687b..a3dda6959 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -319,7 +319,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
|
||
|
|
#define xV OF_RES2
|
||
|
|
#define xS (OF_RES2 | OF_STR2)
|
||
|
|
#define Vx OF_RES1
|
||
|
|
-#define Rx (OF_RES1 | OF_NUM1 | OF_REQUIRED)
|
||
|
|
+#define Rx OF_REQUIRED
|
||
|
|
#define VV (OF_RES1 | OF_RES2)
|
||
|
|
#define Nx (OF_RES1 | OF_NUM1)
|
||
|
|
#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
|
||
|
|
@@ -2750,32 +2750,6 @@ static var *evaluate(node *op, var *res)
|
||
|
|
op1 = op->l.n;
|
||
|
|
debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
|
||
|
|
|
||
|
|
- /* "delete" is special:
|
||
|
|
- * "delete array[var--]" must evaluate index expr only once,
|
||
|
|
- * must not evaluate it in "execute inevitable things" part.
|
||
|
|
- */
|
||
|
|
- if (XC(opinfo & OPCLSMASK) == XC(OC_DELETE)) {
|
||
|
|
- uint32_t info = op1->info & OPCLSMASK;
|
||
|
|
- var *v;
|
||
|
|
-
|
||
|
|
- debug_printf_eval("DELETE\n");
|
||
|
|
- if (info == OC_VAR) {
|
||
|
|
- v = op1->l.v;
|
||
|
|
- } else if (info == OC_FNARG) {
|
||
|
|
- v = &fnargs[op1->l.aidx];
|
||
|
|
- } else {
|
||
|
|
- syntax_error(EMSG_NOT_ARRAY);
|
||
|
|
- }
|
||
|
|
- if (op1->r.n) { /* array ref? */
|
||
|
|
- const char *s;
|
||
|
|
- s = getvar_s(evaluate(op1->r.n, TMPVAR0));
|
||
|
|
- hash_remove(iamarray(v), s);
|
||
|
|
- } else {
|
||
|
|
- clear_array(iamarray(v));
|
||
|
|
- }
|
||
|
|
- goto next;
|
||
|
|
- }
|
||
|
|
-
|
||
|
|
/* execute inevitable things */
|
||
|
|
if (opinfo & OF_RES1)
|
||
|
|
L.v = evaluate(op1, TMPVAR0);
|
||
|
|
@@ -2905,7 +2879,31 @@ static var *evaluate(node *op, var *res)
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
|
||
|
|
- /* case XC( OC_DELETE ): - moved to happen before arg evaluation */
|
||
|
|
+ case XC( OC_DELETE ):
|
||
|
|
+ debug_printf_eval("DELETE\n");
|
||
|
|
+ {
|
||
|
|
+ /* "delete" is special:
|
||
|
|
+ * "delete array[var--]" must evaluate index expr only once.
|
||
|
|
+ */
|
||
|
|
+ uint32_t info = op1->info & OPCLSMASK;
|
||
|
|
+ var *v;
|
||
|
|
+
|
||
|
|
+ if (info == OC_VAR) {
|
||
|
|
+ v = op1->l.v;
|
||
|
|
+ } else if (info == OC_FNARG) {
|
||
|
|
+ v = &fnargs[op1->l.aidx];
|
||
|
|
+ } else {
|
||
|
|
+ syntax_error(EMSG_NOT_ARRAY);
|
||
|
|
+ }
|
||
|
|
+ if (op1->r.n) { /* array ref? */
|
||
|
|
+ const char *s;
|
||
|
|
+ s = getvar_s(evaluate(op1->r.n, TMPVAR0));
|
||
|
|
+ hash_remove(iamarray(v), s);
|
||
|
|
+ } else {
|
||
|
|
+ clear_array(iamarray(v));
|
||
|
|
+ }
|
||
|
|
+ break;
|
||
|
|
+ }
|
||
|
|
|
||
|
|
case XC( OC_NEWSOURCE ):
|
||
|
|
debug_printf_eval("NEWSOURCE\n");
|
||
|
|
@@ -3342,7 +3340,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
default:
|
||
|
|
syntax_error(EMSG_POSSIBLE_ERROR);
|
||
|
|
} /* switch */
|
||
|
|
- next:
|
||
|
|
+
|
||
|
|
if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
|
||
|
|
op = op->a.n;
|
||
|
|
if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 2f36bdf0eb01846b23c1a340ff6f19fd9377ed6a Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 17:32:08 +0200
|
||
|
|
Subject: [PATCH 34/61] awk: make builtin definitions more understandable, no
|
||
|
|
code changes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 71 +++++++++++++++++++++++++++++++++++----------------
|
||
|
|
1 file changed, 49 insertions(+), 22 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index a3dda6959..fb841687e 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -331,8 +331,7 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
|
||
|
|
#define OPNMASK 0x007F
|
||
|
|
|
||
|
|
/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
|
||
|
|
- * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
|
||
|
|
- * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
|
||
|
|
+ * (for builtins it has different meaning)
|
||
|
|
*/
|
||
|
|
#undef P
|
||
|
|
#undef PRIMASK
|
||
|
|
@@ -430,8 +429,6 @@ static const char tokenlist[] ALIGN1 =
|
||
|
|
/* compiler adds trailing "\0" */
|
||
|
|
;
|
||
|
|
|
||
|
|
-#define OC_B OC_BUILTIN
|
||
|
|
-
|
||
|
|
static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
0,
|
||
|
|
0,
|
||
|
|
@@ -464,20 +461,43 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
OC_RETURN|Vx, OC_EXIT|Nx,
|
||
|
|
ST_WHILE,
|
||
|
|
0, /* else */
|
||
|
|
- OC_B|B_an|P(0x83), OC_B|B_co|P(0x41), OC_B|B_ls|P(0x83), OC_B|B_or|P(0x83),
|
||
|
|
- OC_B|B_rs|P(0x83), OC_B|B_xo|P(0x83),
|
||
|
|
- OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
|
||
|
|
- OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
|
||
|
|
- OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
|
||
|
|
- OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), /* OC_FBLTIN|Sx|F_le, was here */
|
||
|
|
- OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
|
||
|
|
- OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b), OC_B|B_mt|P(0x0b),
|
||
|
|
- OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
|
||
|
|
- OC_FBLTIN|Sx|F_le, /* TC_LENGTH */
|
||
|
|
- OC_GETLINE|SV|P(0),
|
||
|
|
- 0, 0,
|
||
|
|
- 0,
|
||
|
|
- 0 /* TC_END */
|
||
|
|
+// OC_B's are builtins with enforced minimum number of arguments (two upper bits).
|
||
|
|
+// Highest byte bit pattern: nn s3s2s1 v3v2v1
|
||
|
|
+// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
|
||
|
|
+// OC_FBLTIN's are builtins with one optional argument,
|
||
|
|
+// TODO: enforce exactly one arg for: system, close, cos, sin, exp, int, log, sqrt
|
||
|
|
+// zero args for: rand systime
|
||
|
|
+// Do have one optional arg: fflush, srand, length
|
||
|
|
+#define OC_B OC_BUILTIN
|
||
|
|
+#define A1 P(0x40) /*one arg*/
|
||
|
|
+#define A2 P(0x80) /*two args*/
|
||
|
|
+#define A3 P(0xc0) /*three args*/
|
||
|
|
+#define __v P(1)
|
||
|
|
+#define _vv P(3)
|
||
|
|
+#define __s__v P(9)
|
||
|
|
+#define __s_vv P(0x0b)
|
||
|
|
+#define __svvv P(0x0f)
|
||
|
|
+#define _ss_vv P(0x1b)
|
||
|
|
+#define _s_vv_ P(0x16)
|
||
|
|
+#define ss_vv_ P(0x36)
|
||
|
|
+ OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
|
||
|
|
+ OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
|
||
|
|
+ OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2
|
||
|
|
+ OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, // cos exp int log
|
||
|
|
+ OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand
|
||
|
|
+ OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
|
||
|
|
+ OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub
|
||
|
|
+ OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
|
||
|
|
+ OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper
|
||
|
|
+ OC_FBLTIN|Sx|F_le, // length
|
||
|
|
+ OC_GETLINE|SV, // getline
|
||
|
|
+ 0, 0, // func function
|
||
|
|
+ 0, // BEGIN
|
||
|
|
+ 0 // END
|
||
|
|
+#undef A1
|
||
|
|
+#undef A2
|
||
|
|
+#undef A3
|
||
|
|
+#undef OC_B
|
||
|
|
};
|
||
|
|
|
||
|
|
/* internal variable names and their initial values */
|
||
|
|
@@ -1630,6 +1650,7 @@ static void chain_group(void)
|
||
|
|
debug_printf_parse("%s: OC_BREAK\n", __func__);
|
||
|
|
n = chain_node(OC_EXEC);
|
||
|
|
n->a.n = break_ptr;
|
||
|
|
+//TODO: if break_ptr is NULL, syntax error (not in the loop)?
|
||
|
|
chain_expr(t_info);
|
||
|
|
break;
|
||
|
|
|
||
|
|
@@ -1637,6 +1658,7 @@ static void chain_group(void)
|
||
|
|
debug_printf_parse("%s: OC_CONTINUE\n", __func__);
|
||
|
|
n = chain_node(OC_EXEC);
|
||
|
|
n->a.n = continue_ptr;
|
||
|
|
+//TODO: if continue_ptr is NULL, syntax error (not in the loop)?
|
||
|
|
chain_expr(t_info);
|
||
|
|
break;
|
||
|
|
|
||
|
|
@@ -1799,8 +1821,8 @@ static regex_t *as_regex(node *op, regex_t *preg)
|
||
|
|
return icase ? op->r.ire : op->l.re;
|
||
|
|
}
|
||
|
|
|
||
|
|
-#define TMPVAR (&G.as_regex__tmpvar)
|
||
|
|
//tmpvar = nvalloc(1);
|
||
|
|
+#define TMPVAR (&G.as_regex__tmpvar)
|
||
|
|
// We use a single "static" tmpvar (instead of on-stack or malloced one)
|
||
|
|
// to decrease memory consumption in deeply-recursive awk programs.
|
||
|
|
// The rule to work safely is to never call evaluate() while our static
|
||
|
|
@@ -2720,8 +2742,6 @@ static var *evaluate(node *op, var *res)
|
||
|
|
#define sreg (G.evaluate__sreg)
|
||
|
|
|
||
|
|
var *tmpvars;
|
||
|
|
-#define TMPVAR0 (tmpvars)
|
||
|
|
-#define TMPVAR1 (tmpvars + 1)
|
||
|
|
|
||
|
|
if (!op)
|
||
|
|
return setvar_s(res, NULL);
|
||
|
|
@@ -2729,6 +2749,8 @@ static var *evaluate(node *op, var *res)
|
||
|
|
debug_printf_eval("entered %s()\n", __func__);
|
||
|
|
|
||
|
|
tmpvars = nvalloc(2);
|
||
|
|
+#define TMPVAR0 (tmpvars)
|
||
|
|
+#define TMPVAR1 (tmpvars + 1)
|
||
|
|
|
||
|
|
while (op) {
|
||
|
|
struct {
|
||
|
|
@@ -3166,7 +3188,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
rstream *rsm;
|
||
|
|
int err = 0;
|
||
|
|
rsm = (rstream *)hash_search(fdhash, L.s);
|
||
|
|
- debug_printf_eval("OC_FBLTIN F_cl rsm:%p\n", rsm);
|
||
|
|
+ debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm);
|
||
|
|
if (rsm) {
|
||
|
|
debug_printf_eval("OC_FBLTIN F_cl "
|
||
|
|
"rsm->is_pipe:%d, ->F:%p\n",
|
||
|
|
@@ -3177,6 +3199,11 @@ static var *evaluate(node *op, var *res)
|
||
|
|
*/
|
||
|
|
if (rsm->F)
|
||
|
|
err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
|
||
|
|
+//TODO: fix this case:
|
||
|
|
+// $ awk 'BEGIN { print close(""); print ERRNO }'
|
||
|
|
+// -1
|
||
|
|
+// close of redirection that was never opened
|
||
|
|
+// (we print 0, 0)
|
||
|
|
free(rsm->buffer);
|
||
|
|
hash_remove(fdhash, L.s);
|
||
|
|
}
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 8eb26034fb7225862c73f1dfa947a5d4910a0935 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 18:28:12 +0200
|
||
|
|
Subject: [PATCH 35/61] awk: enforce simple builtins' argument number
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
evaluate 3215 3303 +88
|
||
|
|
.rodata 104036 104107 +71
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 2/0 up/down: 159/0) Total: 159 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 45 ++++++++++++++++++++++++++++-----------------
|
||
|
|
1 file changed, 28 insertions(+), 17 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index fb841687e..1925e0771 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -464,11 +464,11 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
// OC_B's are builtins with enforced minimum number of arguments (two upper bits).
|
||
|
|
// Highest byte bit pattern: nn s3s2s1 v3v2v1
|
||
|
|
// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
|
||
|
|
-// OC_FBLTIN's are builtins with one optional argument,
|
||
|
|
-// TODO: enforce exactly one arg for: system, close, cos, sin, exp, int, log, sqrt
|
||
|
|
-// zero args for: rand systime
|
||
|
|
-// Do have one optional arg: fflush, srand, length
|
||
|
|
-#define OC_B OC_BUILTIN
|
||
|
|
+// OC_FBLTIN's are builtins with zero or one argument.
|
||
|
|
+// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt.
|
||
|
|
+// Check for no args is present in builtins' code (not in this table): rand, systime.
|
||
|
|
+// Have one _optional_ arg: fflush, srand, length
|
||
|
|
+#define OC_B OC_BUILTIN
|
||
|
|
#define A1 P(0x40) /*one arg*/
|
||
|
|
#define A2 P(0x80) /*two args*/
|
||
|
|
#define A3 P(0xc0) /*three args*/
|
||
|
|
@@ -480,15 +480,15 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
#define _ss_vv P(0x1b)
|
||
|
|
#define _s_vv_ P(0x16)
|
||
|
|
#define ss_vv_ P(0x36)
|
||
|
|
- OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
|
||
|
|
- OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
|
||
|
|
- OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2
|
||
|
|
- OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg, // cos exp int log
|
||
|
|
- OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand
|
||
|
|
- OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
|
||
|
|
- OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub
|
||
|
|
- OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
|
||
|
|
- OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper
|
||
|
|
+ OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
|
||
|
|
+ OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
|
||
|
|
+ OC_FBLTIN|Sx|Rx|F_cl,OC_FBLTIN|Sx|Rx|F_sy,OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2
|
||
|
|
+ OC_FBLTIN|Nx|Rx|F_co,OC_FBLTIN|Nx|Rx|F_ex,OC_FBLTIN|Nx|Rx|F_in,OC_FBLTIN|Nx|Rx|F_lg,// cos exp int log
|
||
|
|
+ OC_FBLTIN|F_rn, OC_FBLTIN|Nx|Rx|F_si,OC_FBLTIN|Nx|Rx|F_sq,OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand
|
||
|
|
+ OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
|
||
|
|
+ OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub
|
||
|
|
+ OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
|
||
|
|
+ OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper
|
||
|
|
OC_FBLTIN|Sx|F_le, // length
|
||
|
|
OC_GETLINE|SV, // getline
|
||
|
|
0, 0, // func function
|
||
|
|
@@ -2773,8 +2773,11 @@ static var *evaluate(node *op, var *res)
|
||
|
|
debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
|
||
|
|
|
||
|
|
/* execute inevitable things */
|
||
|
|
- if (opinfo & OF_RES1)
|
||
|
|
+ if (opinfo & OF_RES1) {
|
||
|
|
+ if ((opinfo & OF_REQUIRED) && !op1)
|
||
|
|
+ syntax_error(EMSG_TOO_FEW_ARGS);
|
||
|
|
L.v = evaluate(op1, TMPVAR0);
|
||
|
|
+ }
|
||
|
|
if (opinfo & OF_STR1) {
|
||
|
|
L.s = getvar_s(L.v);
|
||
|
|
debug_printf_eval("L.s:'%s'\n", L.s);
|
||
|
|
@@ -3101,12 +3104,18 @@ static var *evaluate(node *op, var *res)
|
||
|
|
double R_d = R_d; /* for compiler */
|
||
|
|
debug_printf_eval("FBLTIN\n");
|
||
|
|
|
||
|
|
+ if (op1 && (op1->info & OPCLSMASK) == OC_COMMA)
|
||
|
|
+ /* Simple builtins take one arg maximum */
|
||
|
|
+ syntax_error("Too many arguments");
|
||
|
|
+
|
||
|
|
switch (opn) {
|
||
|
|
case F_in:
|
||
|
|
R_d = (long long)L_d;
|
||
|
|
break;
|
||
|
|
|
||
|
|
- case F_rn:
|
||
|
|
+ case F_rn: /*rand*/
|
||
|
|
+ if (op1)
|
||
|
|
+ syntax_error("Too many arguments");
|
||
|
|
R_d = (double)rand() / (double)RAND_MAX;
|
||
|
|
break;
|
||
|
|
|
||
|
|
@@ -3149,7 +3158,9 @@ static var *evaluate(node *op, var *res)
|
||
|
|
srand(seed);
|
||
|
|
break;
|
||
|
|
|
||
|
|
- case F_ti:
|
||
|
|
+ case F_ti: /*systime*/
|
||
|
|
+ if (op1)
|
||
|
|
+ syntax_error("Too many arguments");
|
||
|
|
R_d = time(NULL);
|
||
|
|
break;
|
||
|
|
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From bd554e662f7246fd1518db37049aaf9ecf61bce9 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 18:55:00 +0200
|
||
|
|
Subject: [PATCH 36/61] awk: beautify builtins table, no code changes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 26 ++++++++++++++------------
|
||
|
|
1 file changed, 14 insertions(+), 12 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 1925e0771..8d7777ca6 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -464,11 +464,12 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
// OC_B's are builtins with enforced minimum number of arguments (two upper bits).
|
||
|
|
// Highest byte bit pattern: nn s3s2s1 v3v2v1
|
||
|
|
// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
|
||
|
|
-// OC_FBLTIN's are builtins with zero or one argument.
|
||
|
|
+// OC_F's are builtins with zero or one argument.
|
||
|
|
// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt.
|
||
|
|
// Check for no args is present in builtins' code (not in this table): rand, systime.
|
||
|
|
// Have one _optional_ arg: fflush, srand, length
|
||
|
|
#define OC_B OC_BUILTIN
|
||
|
|
+#define OC_F OC_FBLTIN
|
||
|
|
#define A1 P(0x40) /*one arg*/
|
||
|
|
#define A2 P(0x80) /*two args*/
|
||
|
|
#define A3 P(0xc0) /*three args*/
|
||
|
|
@@ -480,17 +481,17 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
#define _ss_vv P(0x1b)
|
||
|
|
#define _s_vv_ P(0x16)
|
||
|
|
#define ss_vv_ P(0x36)
|
||
|
|
- OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
|
||
|
|
- OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
|
||
|
|
- OC_FBLTIN|Sx|Rx|F_cl,OC_FBLTIN|Sx|Rx|F_sy,OC_FBLTIN|Sx|F_ff, OC_B|B_a2|_vv|A2, // close system fflush atan2
|
||
|
|
- OC_FBLTIN|Nx|Rx|F_co,OC_FBLTIN|Nx|Rx|F_ex,OC_FBLTIN|Nx|Rx|F_in,OC_FBLTIN|Nx|Rx|F_lg,// cos exp int log
|
||
|
|
- OC_FBLTIN|F_rn, OC_FBLTIN|Nx|Rx|F_si,OC_FBLTIN|Nx|Rx|F_sq,OC_FBLTIN|Nx|F_sr, // rand sin sqrt srand
|
||
|
|
- OC_B|B_ge|_s_vv_|A3, OC_B|B_gs|ss_vv_|A2, OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
|
||
|
|
- OC_B|B_ma|__s__v|A2, OC_B|B_sp|__s_vv|A2, OC_SPRINTF, OC_B|B_su|ss_vv_|A2, // match split sprintf sub
|
||
|
|
- OC_B|B_ss|__svvv|A2, OC_FBLTIN|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
|
||
|
|
- OC_B|B_lo|__s__v|A1, OC_B|B_up|__s__v|A1, // tolower toupper
|
||
|
|
- OC_FBLTIN|Sx|F_le, // length
|
||
|
|
- OC_GETLINE|SV, // getline
|
||
|
|
+ OC_B|B_an|_vv|A2, OC_B|B_co|__v|A1, OC_B|B_ls|_vv|A2, OC_B|B_or|_vv|A2, // and compl lshift or
|
||
|
|
+ OC_B|B_rs|_vv|A2, OC_B|B_xo|_vv|A2, // rshift xor
|
||
|
|
+ OC_F|F_cl|Sx|Rx, OC_F|F_sy|Sx|Rx, OC_F|F_ff|Sx, OC_B|B_a2|_vv|A2, // close system fflush atan2
|
||
|
|
+ OC_F|F_co|Nx|Rx, OC_F|F_ex|Nx|Rx, OC_F|F_in|Nx|Rx, OC_F|F_lg|Nx|Rx, // cos exp int log
|
||
|
|
+ OC_F|F_rn, OC_F|F_si|Nx|Rx, OC_F|F_sq|Nx|Rx, OC_F|F_sr|Nx, // rand sin sqrt srand
|
||
|
|
+ OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2, // gensub gsub index /*length was here*/
|
||
|
|
+ OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF, OC_B|B_su|ss_vv_|A2,// match split sprintf sub
|
||
|
|
+ OC_B|B_ss|__svvv|A2,OC_F|F_ti, OC_B|B_ti|__s_vv, OC_B|B_mt|__s_vv, // substr systime strftime mktime
|
||
|
|
+ OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1, // tolower toupper
|
||
|
|
+ OC_F|F_le|Sx, // length
|
||
|
|
+ OC_GETLINE|SV, // getline
|
||
|
|
0, 0, // func function
|
||
|
|
0, // BEGIN
|
||
|
|
0 // END
|
||
|
|
@@ -498,6 +499,7 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
#undef A2
|
||
|
|
#undef A3
|
||
|
|
#undef OC_B
|
||
|
|
+#undef OC_F
|
||
|
|
};
|
||
|
|
|
||
|
|
/* internal variable names and their initial values */
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 2fcb86ed0176fcfe85d279d637a3d1b15ecf24bb Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 19:38:03 +0200
|
||
|
|
Subject: [PATCH 37/61] awk: rand() could return 1.0, fix this - should be in
|
||
|
|
[0,1)
|
||
|
|
|
||
|
|
While at it, make it finer-grained (63 bits of randomness)
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
evaluate 3303 3336 +33
|
||
|
|
.rodata 104107 104111 +4
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 2/0 up/down: 37/0) Total: 37 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 15 +++++++++++++--
|
||
|
|
1 file changed, 13 insertions(+), 2 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 8d7777ca6..64fe81be4 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -3118,9 +3118,20 @@ static var *evaluate(node *op, var *res)
|
||
|
|
case F_rn: /*rand*/
|
||
|
|
if (op1)
|
||
|
|
syntax_error("Too many arguments");
|
||
|
|
- R_d = (double)rand() / (double)RAND_MAX;
|
||
|
|
+ {
|
||
|
|
+#if RAND_MAX >= 0x7fffffff
|
||
|
|
+ uint32_t u = ((uint32_t)rand() << 16) ^ rand();
|
||
|
|
+ uint64_t v = ((uint64_t)rand() << 32) | u;
|
||
|
|
+ /* the above shift+or is optimized out on 32-bit arches */
|
||
|
|
+# if RAND_MAX > 0x7fffffff
|
||
|
|
+ v &= 0x7fffffffffffffffUL;
|
||
|
|
+# endif
|
||
|
|
+ R_d = (double)v / 0x8000000000000000UL;
|
||
|
|
+#else
|
||
|
|
+# error Not implemented for this value of RAND_MAX
|
||
|
|
+#endif
|
||
|
|
break;
|
||
|
|
-
|
||
|
|
+ }
|
||
|
|
case F_co:
|
||
|
|
if (ENABLE_FEATURE_AWK_LIBM) {
|
||
|
|
R_d = cos(L_d);
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From c4aa325fa23237d1c9452ed2be468730d6e2c615 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 22:28:51 +0200
|
||
|
|
Subject: [PATCH 38/61] awk: fix beavior of "exit" without parameter
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
evaluate 3336 3339 +3
|
||
|
|
awk_exit 93 94 +1
|
||
|
|
awk_main 829 827 -2
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 2/1 up/down: 4/-2) Total: 2 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 40 ++++++++++++++++++++++------------------
|
||
|
|
testsuite/awk.tests | 5 +++++
|
||
|
|
2 files changed, 27 insertions(+), 18 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 64fe81be4..86cb7a95f 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -578,6 +578,8 @@ struct globals2 {
|
||
|
|
rstream next_input_file__rsm;
|
||
|
|
smallint next_input_file__files_happen;
|
||
|
|
|
||
|
|
+ smalluint exitcode;
|
||
|
|
+
|
||
|
|
unsigned evaluate__seed;
|
||
|
|
var *evaluate__fnargs;
|
||
|
|
regex_t evaluate__sreg;
|
||
|
|
@@ -655,7 +657,7 @@ static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
|
||
|
|
static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
|
||
|
|
static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
|
||
|
|
|
||
|
|
-static int awk_exit(int) NORETURN;
|
||
|
|
+static int awk_exit(void) NORETURN;
|
||
|
|
|
||
|
|
static void syntax_error(const char *message) NORETURN;
|
||
|
|
static void syntax_error(const char *message)
|
||
|
|
@@ -2779,14 +2781,14 @@ static var *evaluate(node *op, var *res)
|
||
|
|
if ((opinfo & OF_REQUIRED) && !op1)
|
||
|
|
syntax_error(EMSG_TOO_FEW_ARGS);
|
||
|
|
L.v = evaluate(op1, TMPVAR0);
|
||
|
|
- }
|
||
|
|
- if (opinfo & OF_STR1) {
|
||
|
|
- L.s = getvar_s(L.v);
|
||
|
|
- debug_printf_eval("L.s:'%s'\n", L.s);
|
||
|
|
- }
|
||
|
|
- if (opinfo & OF_NUM1) {
|
||
|
|
- L_d = getvar_i(L.v);
|
||
|
|
- debug_printf_eval("L_d:%f\n", L_d);
|
||
|
|
+ if (opinfo & OF_STR1) {
|
||
|
|
+ L.s = getvar_s(L.v);
|
||
|
|
+ debug_printf_eval("L.s:'%s'\n", L.s);
|
||
|
|
+ }
|
||
|
|
+ if (opinfo & OF_NUM1) {
|
||
|
|
+ L_d = getvar_i(L.v);
|
||
|
|
+ debug_printf_eval("L_d:%f\n", L_d);
|
||
|
|
+ }
|
||
|
|
}
|
||
|
|
/* NB: Must get string/numeric values of L (done above)
|
||
|
|
* _before_ evaluate()'ing R.v: if both L and R are $NNNs,
|
||
|
|
@@ -2799,10 +2801,10 @@ static var *evaluate(node *op, var *res)
|
||
|
|
R.v = evaluate(op->r.n, TMPVAR1);
|
||
|
|
//TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
|
||
|
|
//L.v = NULL;
|
||
|
|
- }
|
||
|
|
- if (opinfo & OF_STR2) {
|
||
|
|
- R.s = getvar_s(R.v);
|
||
|
|
- debug_printf_eval("R.s:'%s'\n", R.s);
|
||
|
|
+ if (opinfo & OF_STR2) {
|
||
|
|
+ R.s = getvar_s(R.v);
|
||
|
|
+ debug_printf_eval("R.s:'%s'\n", R.s);
|
||
|
|
+ }
|
||
|
|
}
|
||
|
|
|
||
|
|
debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
|
||
|
|
@@ -2955,7 +2957,9 @@ static var *evaluate(node *op, var *res)
|
||
|
|
|
||
|
|
case XC( OC_EXIT ):
|
||
|
|
debug_printf_eval("EXIT\n");
|
||
|
|
- awk_exit(L_d);
|
||
|
|
+ if (op1)
|
||
|
|
+ G.exitcode = (int)L_d;
|
||
|
|
+ awk_exit();
|
||
|
|
|
||
|
|
/* -- recursive node type -- */
|
||
|
|
|
||
|
|
@@ -3414,7 +3418,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
|
||
|
|
/* -------- main & co. -------- */
|
||
|
|
|
||
|
|
-static int awk_exit(int r)
|
||
|
|
+static int awk_exit(void)
|
||
|
|
{
|
||
|
|
unsigned i;
|
||
|
|
|
||
|
|
@@ -3435,7 +3439,7 @@ static int awk_exit(int r)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
- exit(r);
|
||
|
|
+ exit(G.exitcode);
|
||
|
|
}
|
||
|
|
|
||
|
|
int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
|
||
|
|
@@ -3560,7 +3564,7 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
|
||
|
|
evaluate(beginseq.first, &G.main__tmpvar);
|
||
|
|
if (!mainseq.first && !endseq.first)
|
||
|
|
- awk_exit(EXIT_SUCCESS);
|
||
|
|
+ awk_exit();
|
||
|
|
|
||
|
|
/* input file could already be opened in BEGIN block */
|
||
|
|
if (!iF)
|
||
|
|
@@ -3587,6 +3591,6 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
iF = next_input_file();
|
||
|
|
}
|
||
|
|
|
||
|
|
- awk_exit(EXIT_SUCCESS);
|
||
|
|
+ awk_exit();
|
||
|
|
/*return 0;*/
|
||
|
|
}
|
||
|
|
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
|
||
|
|
index 3c230393f..770d8ffce 100755
|
||
|
|
--- a/testsuite/awk.tests
|
||
|
|
+++ b/testsuite/awk.tests
|
||
|
|
@@ -445,4 +445,9 @@ testing 'awk $NF is empty' \
|
||
|
|
'' \
|
||
|
|
'a=====123='
|
||
|
|
|
||
|
|
+testing "awk exit N propagates through END's exit" \
|
||
|
|
+ "awk 'BEGIN { exit 42 } END { exit }'; echo \$?" \
|
||
|
|
+ "42\n" \
|
||
|
|
+ '' ''
|
||
|
|
+
|
||
|
|
exit $FAILCOUNT
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 1829a5b292a37553e8cc8f544448c591b3a7b3f6 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 23:07:21 +0200
|
||
|
|
Subject: [PATCH 39/61] awk: fix detection of VAR=VAL arguments
|
||
|
|
|
||
|
|
1NAME=VAL is not it, neither is VA.R=VAL
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
next_input_file 216 214 -2
|
||
|
|
is_assignment 115 91 -24
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-26) Total: -26 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 9 ++++-----
|
||
|
|
1 file changed, 4 insertions(+), 5 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 86cb7a95f..9f14f0f9a 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -2679,7 +2679,8 @@ static int is_assignment(const char *expr)
|
||
|
|
{
|
||
|
|
char *exprc, *val;
|
||
|
|
|
||
|
|
- if (!isalnum_(*expr) || (val = strchr(expr, '=')) == NULL) {
|
||
|
|
+ val = (char*)endofname(expr);
|
||
|
|
+ if (val == (char*)expr || *val != '=') {
|
||
|
|
return FALSE;
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -2699,7 +2700,6 @@ static rstream *next_input_file(void)
|
||
|
|
#define rsm (G.next_input_file__rsm)
|
||
|
|
#define files_happen (G.next_input_file__files_happen)
|
||
|
|
|
||
|
|
- FILE *F;
|
||
|
|
const char *fname, *ind;
|
||
|
|
|
||
|
|
if (rsm.F)
|
||
|
|
@@ -2712,20 +2712,19 @@ static rstream *next_input_file(void)
|
||
|
|
if (files_happen)
|
||
|
|
return NULL;
|
||
|
|
fname = "-";
|
||
|
|
- F = stdin;
|
||
|
|
+ rsm.F = stdin;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
ind = getvar_s(incvar(intvar[ARGIND]));
|
||
|
|
fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
|
||
|
|
if (fname && *fname && !is_assignment(fname)) {
|
||
|
|
- F = xfopen_stdin(fname);
|
||
|
|
+ rsm.F = xfopen_stdin(fname);
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
files_happen = TRUE;
|
||
|
|
setvar_s(intvar[FILENAME], fname);
|
||
|
|
- rsm.F = F;
|
||
|
|
return &rsm;
|
||
|
|
#undef rsm
|
||
|
|
#undef files_happen
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 2e495deee760595d6b0df37f1f9b7d1e4ecab1ed Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 23:24:52 +0200
|
||
|
|
Subject: [PATCH 40/61] awk: use smaller regmatch_t arrays, they had 2 elements
|
||
|
|
for no apparent reason
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
exec_builtin 1479 1434 -45
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 8 +++++---
|
||
|
|
1 file changed, 5 insertions(+), 3 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 9f14f0f9a..c06dd2304 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1937,7 +1937,7 @@ static int awk_split(const char *s, node *spl, char **slist)
|
||
|
|
n++; /* at least one field will be there */
|
||
|
|
do {
|
||
|
|
int l;
|
||
|
|
- regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
|
||
|
|
+ regmatch_t pmatch[1];
|
||
|
|
|
||
|
|
l = strcspn(s, c+2); /* len till next NUL or \n */
|
||
|
|
if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0
|
||
|
|
@@ -2166,7 +2166,7 @@ static int ptest(node *pattern)
|
||
|
|
static int awk_getline(rstream *rsm, var *v)
|
||
|
|
{
|
||
|
|
char *b;
|
||
|
|
- regmatch_t pmatch[2]; // TODO: why [2]? [1] is enough...
|
||
|
|
+ regmatch_t pmatch[1];
|
||
|
|
int size, a, p, pp = 0;
|
||
|
|
int fd, so, eo, r, rp;
|
||
|
|
char c, *m, *s;
|
||
|
|
@@ -2473,7 +2473,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
node *an[4];
|
||
|
|
var *av[4];
|
||
|
|
const char *as[4];
|
||
|
|
- regmatch_t pmatch[2];
|
||
|
|
+ regmatch_t pmatch[1];
|
||
|
|
regex_t sreg, *re;
|
||
|
|
node *spl;
|
||
|
|
uint32_t isr, info;
|
||
|
|
@@ -3533,6 +3533,8 @@ int awk_main(int argc UNUSED_PARAM, char **argv)
|
||
|
|
parse_program(llist_pop(&list_e));
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
+//FIXME: preserve order of -e and -f
|
||
|
|
+//TODO: implement -i LIBRARY and -E FILE too, they are easy-ish
|
||
|
|
if (!(opt & (OPT_f | OPT_e))) {
|
||
|
|
if (!*argv)
|
||
|
|
bb_show_usage();
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From bb55cde906cbaf136d6487ed7738003aa41b4bd5 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Fri, 2 Jul 2021 23:38:50 +0200
|
||
|
|
Subject: [PATCH 41/61] awk: move match() code out-of-line
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
exec_builtin_match - 202 +202
|
||
|
|
exec_builtin 1434 1157 -277
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 1/0 grow/shrink: 0/1 up/down: 202/-277) Total: -75 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 45 ++++++++++++++++++++++++++++-----------------
|
||
|
|
1 file changed, 28 insertions(+), 17 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index c06dd2304..96e06db25 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -2465,6 +2465,30 @@ static NOINLINE int do_mktime(const char *ds)
|
||
|
|
return mktime(&then);
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Reduce stack usage in exec_builtin() by keeping match() code separate */
|
||
|
|
+static NOINLINE void exec_builtin_match(node *an1, const char *as0, var *res)
|
||
|
|
+{
|
||
|
|
+ regmatch_t pmatch[1];
|
||
|
|
+ regex_t sreg, *re;
|
||
|
|
+ int n;
|
||
|
|
+
|
||
|
|
+ re = as_regex(an1, &sreg);
|
||
|
|
+ n = regexec(re, as0, 1, pmatch, 0);
|
||
|
|
+ if (n == 0) {
|
||
|
|
+ pmatch[0].rm_so++;
|
||
|
|
+ pmatch[0].rm_eo++;
|
||
|
|
+ } else {
|
||
|
|
+ pmatch[0].rm_so = 0;
|
||
|
|
+ pmatch[0].rm_eo = -1;
|
||
|
|
+ }
|
||
|
|
+ if (re == &sreg)
|
||
|
|
+ regfree(re);
|
||
|
|
+ setvar_i(newvar("RSTART"), pmatch[0].rm_so);
|
||
|
|
+ setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
|
||
|
|
+ setvar_i(res, pmatch[0].rm_so);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Reduce stack usage in evaluate() by keeping builtins' code separate */
|
||
|
|
static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
{
|
||
|
|
#define tspl (G.exec_builtin__tspl)
|
||
|
|
@@ -2473,8 +2497,6 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
node *an[4];
|
||
|
|
var *av[4];
|
||
|
|
const char *as[4];
|
||
|
|
- regmatch_t pmatch[1];
|
||
|
|
- regex_t sreg, *re;
|
||
|
|
node *spl;
|
||
|
|
uint32_t isr, info;
|
||
|
|
int nargs;
|
||
|
|
@@ -2633,20 +2655,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
break;
|
||
|
|
|
||
|
|
case B_ma:
|
||
|
|
- re = as_regex(an[1], &sreg);
|
||
|
|
- n = regexec(re, as[0], 1, pmatch, 0);
|
||
|
|
- if (n == 0) {
|
||
|
|
- pmatch[0].rm_so++;
|
||
|
|
- pmatch[0].rm_eo++;
|
||
|
|
- } else {
|
||
|
|
- pmatch[0].rm_so = 0;
|
||
|
|
- pmatch[0].rm_eo = -1;
|
||
|
|
- }
|
||
|
|
- setvar_i(newvar("RSTART"), pmatch[0].rm_so);
|
||
|
|
- setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
|
||
|
|
- setvar_i(res, pmatch[0].rm_so);
|
||
|
|
- if (re == &sreg)
|
||
|
|
- regfree(re);
|
||
|
|
+ exec_builtin_match(an[1], as[0], res);
|
||
|
|
break;
|
||
|
|
|
||
|
|
case B_ge:
|
||
|
|
@@ -2732,7 +2741,9 @@ static rstream *next_input_file(void)
|
||
|
|
|
||
|
|
/*
|
||
|
|
* Evaluate node - the heart of the program. Supplied with subtree
|
||
|
|
- * and place where to store result. Returns ptr to result.
|
||
|
|
+ * and "res" variable to assign the result to if we evaluate an expression.
|
||
|
|
+ * If node refers to e.g. a variable or a field, no assignment happens.
|
||
|
|
+ * Return ptr to the result (which may or may not be the "res" variable!)
|
||
|
|
*/
|
||
|
|
#define XC(n) ((n) >> 8)
|
||
|
|
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From a76f1b553545e144f5456c84398a0d98a81ff70d Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sat, 3 Jul 2021 00:39:55 +0200
|
||
|
|
Subject: [PATCH 42/61] awk: rename GRPSTART/END to L/RBRACE, no code changes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 60 ++++++++++++++++++++++++++++-----------------------
|
||
|
|
1 file changed, 33 insertions(+), 27 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 96e06db25..a1a2afd1d 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -211,8 +211,8 @@ typedef struct tsplitter_s {
|
||
|
|
#define TC_PIPE (1 << 9) /* input redirection pipe | */
|
||
|
|
#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
|
||
|
|
#define TC_ARRTERM (1 << 11) /* ] */
|
||
|
|
-#define TC_GRPSTART (1 << 12) /* { */
|
||
|
|
-#define TC_GRPTERM (1 << 13) /* } */
|
||
|
|
+#define TC_LBRACE (1 << 12) /* { */
|
||
|
|
+#define TC_RBRACE (1 << 13) /* } */
|
||
|
|
#define TC_SEMICOL (1 << 14) /* ; */
|
||
|
|
#define TC_NEWLINE (1 << 15)
|
||
|
|
#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
|
||
|
|
@@ -250,8 +250,8 @@ if ((n) & TC_COMMA ) debug_printf_parse(" COMMA" ); \
|
||
|
|
if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \
|
||
|
|
if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \
|
||
|
|
if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \
|
||
|
|
-if ((n) & TC_GRPSTART) debug_printf_parse(" GRPSTART"); \
|
||
|
|
-if ((n) & TC_GRPTERM ) debug_printf_parse(" GRPTERM" ); \
|
||
|
|
+if ((n) & TC_LBRACE ) debug_printf_parse(" LBRACE" ); \
|
||
|
|
+if ((n) & TC_RBRACE ) debug_printf_parse(" RBRACE" ); \
|
||
|
|
if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \
|
||
|
|
if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \
|
||
|
|
if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \
|
||
|
|
@@ -291,13 +291,13 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
|
||
|
|
| TC_FUNCDECL | TC_BEGIN | TC_END)
|
||
|
|
|
||
|
|
/* discard newlines after these */
|
||
|
|
-#define TS_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM \
|
||
|
|
+#define TS_NOTERM (TC_COMMA | TC_LBRACE | TC_RBRACE \
|
||
|
|
| TS_BINOP | TS_OPTERM)
|
||
|
|
|
||
|
|
/* what can expression begin with */
|
||
|
|
#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
|
||
|
|
/* what can group begin with */
|
||
|
|
-#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_GRPSTART)
|
||
|
|
+#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_LBRACE)
|
||
|
|
|
||
|
|
/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
|
||
|
|
/* operator is inserted between them */
|
||
|
|
@@ -402,8 +402,8 @@ static const char tokenlist[] ALIGN1 =
|
||
|
|
"\1|" NTC /* TC_PIPE */
|
||
|
|
"\1+" "\1-" "\1!" NTC /* TC_UOPPRE2 */
|
||
|
|
"\1]" NTC /* TC_ARRTERM */
|
||
|
|
- "\1{" NTC /* TC_GRPSTART */
|
||
|
|
- "\1}" NTC /* TC_GRPTERM */
|
||
|
|
+ "\1{" NTC /* TC_LBRACE */
|
||
|
|
+ "\1}" NTC /* TC_RBRACE */
|
||
|
|
"\1;" NTC /* TC_SEMICOL */
|
||
|
|
"\1\n" NTC /* TC_NEWLINE */
|
||
|
|
"\2if" "\2do" "\3for" "\5break" /* TC_STATX */
|
||
|
|
@@ -1471,7 +1471,7 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
debug_printf_parse("%s: TC_LENGTH\n", __func__);
|
||
|
|
tc = next_token(TC_LPAREN /* length(...) */
|
||
|
|
| TS_OPTERM /* length; (or newline)*/
|
||
|
|
- | TC_GRPTERM /* length } */
|
||
|
|
+ | TC_RBRACE /* length } */
|
||
|
|
| TC_BINOPX /* length <op> NUM */
|
||
|
|
| TC_COMMA /* print length, 1 */
|
||
|
|
);
|
||
|
|
@@ -1516,11 +1516,11 @@ static void chain_expr(uint32_t info)
|
||
|
|
|
||
|
|
n = chain_node(info);
|
||
|
|
|
||
|
|
- n->l.n = parse_expr(TS_OPTERM | TC_GRPTERM);
|
||
|
|
+ n->l.n = parse_expr(TS_OPTERM | TC_RBRACE);
|
||
|
|
if ((info & OF_REQUIRED) && !n->l.n)
|
||
|
|
syntax_error(EMSG_TOO_FEW_ARGS);
|
||
|
|
|
||
|
|
- if (t_tclass & TC_GRPTERM)
|
||
|
|
+ if (t_tclass & TC_RBRACE)
|
||
|
|
rollback_token();
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -1559,16 +1559,16 @@ static void chain_group(void)
|
||
|
|
c = next_token(TS_GRPSEQ);
|
||
|
|
} while (c & TC_NEWLINE);
|
||
|
|
|
||
|
|
- if (c & TC_GRPSTART) {
|
||
|
|
- debug_printf_parse("%s: TC_GRPSTART\n", __func__);
|
||
|
|
- while ((c = next_token(TS_GRPSEQ | TC_GRPTERM)) != TC_GRPTERM) {
|
||
|
|
- debug_printf_parse("%s: !TC_GRPTERM\n", __func__);
|
||
|
|
+ if (c & TC_LBRACE) {
|
||
|
|
+ debug_printf_parse("%s: TC_LBRACE\n", __func__);
|
||
|
|
+ while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
|
||
|
|
+ debug_printf_parse("%s: !TC_RBRACE\n", __func__);
|
||
|
|
if (c & TC_NEWLINE)
|
||
|
|
continue;
|
||
|
|
rollback_token();
|
||
|
|
chain_group();
|
||
|
|
}
|
||
|
|
- debug_printf_parse("%s: TC_GRPTERM\n", __func__);
|
||
|
|
+ debug_printf_parse("%s: TC_RBRACE\n", __func__);
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
if (c & (TS_OPSEQ | TS_OPTERM)) {
|
||
|
|
@@ -1588,7 +1588,7 @@ static void chain_group(void)
|
||
|
|
chain_group();
|
||
|
|
n2 = chain_node(OC_EXEC);
|
||
|
|
n->r.n = seq->last;
|
||
|
|
- if (next_token(TS_GRPSEQ | TC_GRPTERM | TC_ELSE) == TC_ELSE) {
|
||
|
|
+ if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) {
|
||
|
|
chain_group();
|
||
|
|
n2->a.n = seq->last;
|
||
|
|
} else {
|
||
|
|
@@ -1641,12 +1641,12 @@ static void chain_group(void)
|
||
|
|
case OC_PRINTF:
|
||
|
|
debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
|
||
|
|
n = chain_node(t_info);
|
||
|
|
- n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_GRPTERM);
|
||
|
|
+ n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_RBRACE);
|
||
|
|
if (t_tclass & TC_OUTRDR) {
|
||
|
|
n->info |= t_info;
|
||
|
|
- n->r.n = parse_expr(TS_OPTERM | TC_GRPTERM);
|
||
|
|
+ n->r.n = parse_expr(TS_OPTERM | TC_RBRACE);
|
||
|
|
}
|
||
|
|
- if (t_tclass & TC_GRPTERM)
|
||
|
|
+ if (t_tclass & TC_RBRACE)
|
||
|
|
rollback_token();
|
||
|
|
break;
|
||
|
|
|
||
|
|
@@ -1684,7 +1684,7 @@ static void parse_program(char *p)
|
||
|
|
|
||
|
|
g_pos = p;
|
||
|
|
t_lineno = 1;
|
||
|
|
- while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_GRPSTART |
|
||
|
|
+ while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
|
||
|
|
TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
|
||
|
|
|
||
|
|
if (tclass & TS_OPTERM) {
|
||
|
|
@@ -1696,10 +1696,14 @@ static void parse_program(char *p)
|
||
|
|
if (tclass & TC_BEGIN) {
|
||
|
|
debug_printf_parse("%s: TC_BEGIN\n", __func__);
|
||
|
|
seq = &beginseq;
|
||
|
|
+//TODO: ensure there is no newline between BEGIN and {
|
||
|
|
+//next_token(TC_LBRACE); rollback_token();
|
||
|
|
chain_group();
|
||
|
|
} else if (tclass & TC_END) {
|
||
|
|
debug_printf_parse("%s: TC_END\n", __func__);
|
||
|
|
seq = &endseq;
|
||
|
|
+//TODO: ensure there is no newline between END and {
|
||
|
|
+//next_token(TC_LBRACE); rollback_token();
|
||
|
|
chain_group();
|
||
|
|
} else if (tclass & TC_FUNCDECL) {
|
||
|
|
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
|
||
|
|
@@ -1726,24 +1730,26 @@ static void parse_program(char *p)
|
||
|
|
/* it was a comma, we ate it */
|
||
|
|
}
|
||
|
|
seq = &f->body;
|
||
|
|
+//TODO: ensure there is { after "func F(...)" - but newlines are allowed
|
||
|
|
+//while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token();
|
||
|
|
chain_group();
|
||
|
|
hash_clear(ahash);
|
||
|
|
} else if (tclass & TS_OPSEQ) {
|
||
|
|
debug_printf_parse("%s: TS_OPSEQ\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
cn = chain_node(OC_TEST);
|
||
|
|
- cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_GRPSTART);
|
||
|
|
- if (t_tclass & TC_GRPSTART) {
|
||
|
|
- debug_printf_parse("%s: TC_GRPSTART\n", __func__);
|
||
|
|
+ cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE);
|
||
|
|
+ if (t_tclass & TC_LBRACE) {
|
||
|
|
+ debug_printf_parse("%s: TC_LBRACE\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
chain_group();
|
||
|
|
} else {
|
||
|
|
- debug_printf_parse("%s: !TC_GRPSTART\n", __func__);
|
||
|
|
+ debug_printf_parse("%s: !TC_LBRACE\n", __func__);
|
||
|
|
chain_node(OC_PRINT);
|
||
|
|
}
|
||
|
|
cn->r.n = mainseq.last;
|
||
|
|
- } else /* if (tclass & TC_GRPSTART) */ {
|
||
|
|
- debug_printf_parse("%s: TC_GRPSTART(?)\n", __func__);
|
||
|
|
+ } else /* if (tclass & TC_LBRACE) */ {
|
||
|
|
+ debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
chain_group();
|
||
|
|
}
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From df7698f1df2ed5a82a1558e167ba3262d1c614cb Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sat, 3 Jul 2021 01:16:48 +0200
|
||
|
|
Subject: [PATCH 43/61] awk: tighten rules in action parsing
|
||
|
|
|
||
|
|
Disallow:
|
||
|
|
BEGIN
|
||
|
|
{ action } - must start on the same line
|
||
|
|
Disallow:
|
||
|
|
func f()
|
||
|
|
print "hello" - must be in {...}
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
chain_until_rbrace - 41 +41
|
||
|
|
parse_program 307 336 +29
|
||
|
|
chain_group 649 616 -33
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 1/0 grow/shrink: 1/1 up/down: 70/-33) Total: 37 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 108 ++++++++++++++++++++++++++++++--------------------
|
||
|
|
1 file changed, 66 insertions(+), 42 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index a1a2afd1d..c68416873 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1549,29 +1549,35 @@ static node *chain_loop(node *nn)
|
||
|
|
return n;
|
||
|
|
}
|
||
|
|
|
||
|
|
+static void chain_until_rbrace(void)
|
||
|
|
+{
|
||
|
|
+ uint32_t tc;
|
||
|
|
+ while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
|
||
|
|
+ debug_printf_parse("%s: !TC_RBRACE\n", __func__);
|
||
|
|
+ if (tc == TC_NEWLINE)
|
||
|
|
+ continue;
|
||
|
|
+ rollback_token();
|
||
|
|
+ chain_group();
|
||
|
|
+ }
|
||
|
|
+ debug_printf_parse("%s: TC_RBRACE\n", __func__);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* parse group and attach it to chain */
|
||
|
|
static void chain_group(void)
|
||
|
|
{
|
||
|
|
- uint32_t c;
|
||
|
|
+ uint32_t tc;
|
||
|
|
node *n, *n2, *n3;
|
||
|
|
|
||
|
|
do {
|
||
|
|
- c = next_token(TS_GRPSEQ);
|
||
|
|
- } while (c & TC_NEWLINE);
|
||
|
|
+ tc = next_token(TS_GRPSEQ);
|
||
|
|
+ } while (tc == TC_NEWLINE);
|
||
|
|
|
||
|
|
- if (c & TC_LBRACE) {
|
||
|
|
+ if (tc == TC_LBRACE) {
|
||
|
|
debug_printf_parse("%s: TC_LBRACE\n", __func__);
|
||
|
|
- while ((c = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
|
||
|
|
- debug_printf_parse("%s: !TC_RBRACE\n", __func__);
|
||
|
|
- if (c & TC_NEWLINE)
|
||
|
|
- continue;
|
||
|
|
- rollback_token();
|
||
|
|
- chain_group();
|
||
|
|
- }
|
||
|
|
- debug_printf_parse("%s: TC_RBRACE\n", __func__);
|
||
|
|
+ chain_until_rbrace();
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
- if (c & (TS_OPSEQ | TS_OPTERM)) {
|
||
|
|
+ if (tc & (TS_OPSEQ | TS_OPTERM)) {
|
||
|
|
debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
chain_expr(OC_EXEC | Vx);
|
||
|
|
@@ -1675,37 +1681,48 @@ static void chain_group(void)
|
||
|
|
|
||
|
|
static void parse_program(char *p)
|
||
|
|
{
|
||
|
|
- uint32_t tclass;
|
||
|
|
- node *cn;
|
||
|
|
- func *f;
|
||
|
|
- var *v;
|
||
|
|
-
|
||
|
|
debug_printf_parse("%s()\n", __func__);
|
||
|
|
|
||
|
|
g_pos = p;
|
||
|
|
t_lineno = 1;
|
||
|
|
- while ((tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
|
||
|
|
- TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
|
||
|
|
+ for (;;) {
|
||
|
|
+ uint32_t tclass;
|
||
|
|
|
||
|
|
- if (tclass & TS_OPTERM) {
|
||
|
|
+ tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
|
||
|
|
+ TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL);
|
||
|
|
+
|
||
|
|
+ if (tclass == TC_EOF) {
|
||
|
|
+ debug_printf_parse("%s: TC_EOF\n", __func__);
|
||
|
|
+ break;
|
||
|
|
+ }
|
||
|
|
+ if (tclass & TS_OPTERM) { /* ; or <newline> */
|
||
|
|
debug_printf_parse("%s: TS_OPTERM\n", __func__);
|
||
|
|
+//NB: gawk allows many newlines, but does not allow more than one semicolon:
|
||
|
|
+// BEGIN {...}<newline>;<newline>;
|
||
|
|
+//would complain "each rule must have a pattern or an action part".
|
||
|
|
+//Same message for
|
||
|
|
+// ; BEGIN {...}
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
-
|
||
|
|
- seq = &mainseq;
|
||
|
|
- if (tclass & TC_BEGIN) {
|
||
|
|
+ if (tclass == TC_BEGIN) {
|
||
|
|
debug_printf_parse("%s: TC_BEGIN\n", __func__);
|
||
|
|
seq = &beginseq;
|
||
|
|
-//TODO: ensure there is no newline between BEGIN and {
|
||
|
|
-//next_token(TC_LBRACE); rollback_token();
|
||
|
|
- chain_group();
|
||
|
|
- } else if (tclass & TC_END) {
|
||
|
|
+ /* ensure there is no newline between BEGIN and { */
|
||
|
|
+ next_token(TC_LBRACE);
|
||
|
|
+ chain_until_rbrace();
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ if (tclass == TC_END) {
|
||
|
|
debug_printf_parse("%s: TC_END\n", __func__);
|
||
|
|
seq = &endseq;
|
||
|
|
-//TODO: ensure there is no newline between END and {
|
||
|
|
-//next_token(TC_LBRACE); rollback_token();
|
||
|
|
- chain_group();
|
||
|
|
- } else if (tclass & TC_FUNCDECL) {
|
||
|
|
+ /* ensure there is no newline between END and { */
|
||
|
|
+ next_token(TC_LBRACE);
|
||
|
|
+ chain_until_rbrace();
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ if (tclass == TC_FUNCDECL) {
|
||
|
|
+ func *f;
|
||
|
|
+
|
||
|
|
debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
|
||
|
|
next_token(TC_FUNCTION);
|
||
|
|
f = newfunc(t_string);
|
||
|
|
@@ -1716,6 +1733,7 @@ static void parse_program(char *p)
|
||
|
|
//f->nargs = 0; - already is
|
||
|
|
/* func arg list: comma sep list of args, and a close paren */
|
||
|
|
for (;;) {
|
||
|
|
+ var *v;
|
||
|
|
if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
|
||
|
|
if (f->nargs == 0)
|
||
|
|
break; /* func() is ok */
|
||
|
|
@@ -1730,31 +1748,37 @@ static void parse_program(char *p)
|
||
|
|
/* it was a comma, we ate it */
|
||
|
|
}
|
||
|
|
seq = &f->body;
|
||
|
|
-//TODO: ensure there is { after "func F(...)" - but newlines are allowed
|
||
|
|
-//while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE) continue; rollback_token();
|
||
|
|
- chain_group();
|
||
|
|
+ /* ensure there is { after "func F(...)" - but newlines are allowed */
|
||
|
|
+ while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
|
||
|
|
+ continue;
|
||
|
|
+ chain_until_rbrace();
|
||
|
|
hash_clear(ahash);
|
||
|
|
- } else if (tclass & TS_OPSEQ) {
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ seq = &mainseq;
|
||
|
|
+ if (tclass & TS_OPSEQ) {
|
||
|
|
+ node *cn;
|
||
|
|
+
|
||
|
|
debug_printf_parse("%s: TS_OPSEQ\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
cn = chain_node(OC_TEST);
|
||
|
|
cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE);
|
||
|
|
- if (t_tclass & TC_LBRACE) {
|
||
|
|
+ if (t_tclass == TC_LBRACE) {
|
||
|
|
debug_printf_parse("%s: TC_LBRACE\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
chain_group();
|
||
|
|
} else {
|
||
|
|
+ /* no action, assume default "{ print }" */
|
||
|
|
debug_printf_parse("%s: !TC_LBRACE\n", __func__);
|
||
|
|
chain_node(OC_PRINT);
|
||
|
|
}
|
||
|
|
cn->r.n = mainseq.last;
|
||
|
|
- } else /* if (tclass & TC_LBRACE) */ {
|
||
|
|
- debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
|
||
|
|
- rollback_token();
|
||
|
|
- chain_group();
|
||
|
|
+ continue;
|
||
|
|
}
|
||
|
|
+ /* tclass == TC_LBRACE */
|
||
|
|
+ debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
|
||
|
|
+ chain_until_rbrace();
|
||
|
|
}
|
||
|
|
- debug_printf_parse("%s: TC_EOF\n", __func__);
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From bebe1432529281f66d2004e07194718a47207d5d Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sat, 3 Jul 2021 01:32:03 +0200
|
||
|
|
Subject: [PATCH 44/61] awk: open-code TS_OPTERM, no logic changes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 33 +++++++++++++++++----------------
|
||
|
|
1 file changed, 17 insertions(+), 16 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index c68416873..8c471d693 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -283,7 +283,6 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
|
||
|
|
|
||
|
|
#define TS_LVALUE (TC_VARIABLE | TC_ARRAY)
|
||
|
|
#define TS_STATEMNT (TC_STATX | TC_WHILE)
|
||
|
|
-#define TS_OPTERM (TC_SEMICOL | TC_NEWLINE)
|
||
|
|
|
||
|
|
/* word tokens, cannot mean something else if not expected */
|
||
|
|
#define TS_WORD (TC_IN | TS_STATEMNT | TC_ELSE \
|
||
|
|
@@ -291,13 +290,14 @@ if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
|
||
|
|
| TC_FUNCDECL | TC_BEGIN | TC_END)
|
||
|
|
|
||
|
|
/* discard newlines after these */
|
||
|
|
-#define TS_NOTERM (TC_COMMA | TC_LBRACE | TC_RBRACE \
|
||
|
|
- | TS_BINOP | TS_OPTERM)
|
||
|
|
+#define TS_NOTERM (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \
|
||
|
|
+ | TC_SEMICOL | TC_NEWLINE)
|
||
|
|
|
||
|
|
/* what can expression begin with */
|
||
|
|
#define TS_OPSEQ (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
|
||
|
|
/* what can group begin with */
|
||
|
|
-#define TS_GRPSEQ (TS_OPSEQ | TS_OPTERM | TS_STATEMNT | TC_LBRACE)
|
||
|
|
+#define TS_GRPSEQ (TS_OPSEQ | TS_STATEMNT \
|
||
|
|
+ | TC_SEMICOL | TC_NEWLINE | TC_LBRACE)
|
||
|
|
|
||
|
|
/* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
|
||
|
|
/* operator is inserted between them */
|
||
|
|
@@ -642,7 +642,7 @@ struct globals2 {
|
||
|
|
#define g_buf (G.g_buf )
|
||
|
|
#define INIT_G() do { \
|
||
|
|
SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
|
||
|
|
- t_tclass = TS_OPTERM; \
|
||
|
|
+ t_tclass = TC_NEWLINE; \
|
||
|
|
G.evaluate__seed = 1; \
|
||
|
|
} while (0)
|
||
|
|
|
||
|
|
@@ -1090,7 +1090,7 @@ static uint32_t next_token(uint32_t expected)
|
||
|
|
const uint32_t *ti;
|
||
|
|
uint32_t tc, last_token_class;
|
||
|
|
|
||
|
|
- last_token_class = t_tclass; /* t_tclass is initialized to TS_OPTERM */
|
||
|
|
+ last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */
|
||
|
|
|
||
|
|
debug_printf_parse("%s() expected(%x):", __func__, expected);
|
||
|
|
debug_parse_print_tc(expected);
|
||
|
|
@@ -1470,7 +1470,8 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
case TC_LENGTH:
|
||
|
|
debug_printf_parse("%s: TC_LENGTH\n", __func__);
|
||
|
|
tc = next_token(TC_LPAREN /* length(...) */
|
||
|
|
- | TS_OPTERM /* length; (or newline)*/
|
||
|
|
+ | TC_SEMICOL /* length; */
|
||
|
|
+ | TC_NEWLINE /* length<newline> */
|
||
|
|
| TC_RBRACE /* length } */
|
||
|
|
| TC_BINOPX /* length <op> NUM */
|
||
|
|
| TC_COMMA /* print length, 1 */
|
||
|
|
@@ -1516,7 +1517,7 @@ static void chain_expr(uint32_t info)
|
||
|
|
|
||
|
|
n = chain_node(info);
|
||
|
|
|
||
|
|
- n->l.n = parse_expr(TS_OPTERM | TC_RBRACE);
|
||
|
|
+ n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
|
||
|
|
if ((info & OF_REQUIRED) && !n->l.n)
|
||
|
|
syntax_error(EMSG_TOO_FEW_ARGS);
|
||
|
|
|
||
|
|
@@ -1577,8 +1578,8 @@ static void chain_group(void)
|
||
|
|
chain_until_rbrace();
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
- if (tc & (TS_OPSEQ | TS_OPTERM)) {
|
||
|
|
- debug_printf_parse("%s: TS_OPSEQ | TS_OPTERM\n", __func__);
|
||
|
|
+ if (tc & (TS_OPSEQ | TC_SEMICOL | TC_NEWLINE)) {
|
||
|
|
+ debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL | TC_NEWLINE\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
chain_expr(OC_EXEC | Vx);
|
||
|
|
return;
|
||
|
|
@@ -1647,10 +1648,10 @@ static void chain_group(void)
|
||
|
|
case OC_PRINTF:
|
||
|
|
debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
|
||
|
|
n = chain_node(t_info);
|
||
|
|
- n->l.n = parse_expr(TS_OPTERM | TC_OUTRDR | TC_RBRACE);
|
||
|
|
+ n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE);
|
||
|
|
if (t_tclass & TC_OUTRDR) {
|
||
|
|
n->info |= t_info;
|
||
|
|
- n->r.n = parse_expr(TS_OPTERM | TC_RBRACE);
|
||
|
|
+ n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
|
||
|
|
}
|
||
|
|
if (t_tclass & TC_RBRACE)
|
||
|
|
rollback_token();
|
||
|
|
@@ -1689,14 +1690,14 @@ static void parse_program(char *p)
|
||
|
|
uint32_t tclass;
|
||
|
|
|
||
|
|
tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
|
||
|
|
- TS_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL);
|
||
|
|
+ TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL);
|
||
|
|
|
||
|
|
if (tclass == TC_EOF) {
|
||
|
|
debug_printf_parse("%s: TC_EOF\n", __func__);
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
- if (tclass & TS_OPTERM) { /* ; or <newline> */
|
||
|
|
- debug_printf_parse("%s: TS_OPTERM\n", __func__);
|
||
|
|
+ if (tclass & (TC_SEMICOL | TC_NEWLINE)) {
|
||
|
|
+ debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__);
|
||
|
|
//NB: gawk allows many newlines, but does not allow more than one semicolon:
|
||
|
|
// BEGIN {...}<newline>;<newline>;
|
||
|
|
//would complain "each rule must have a pattern or an action part".
|
||
|
|
@@ -1762,7 +1763,7 @@ static void parse_program(char *p)
|
||
|
|
debug_printf_parse("%s: TS_OPSEQ\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
cn = chain_node(OC_TEST);
|
||
|
|
- cn->l.n = parse_expr(TS_OPTERM | TC_EOF | TC_LBRACE);
|
||
|
|
+ cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
|
||
|
|
if (t_tclass == TC_LBRACE) {
|
||
|
|
debug_printf_parse("%s: TC_LBRACE\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From be80050f2cff5967de7a50eb3aed2f95c39357cd Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sat, 3 Jul 2021 01:59:36 +0200
|
||
|
|
Subject: [PATCH 45/61] awk: support %F %a %A in printf
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
.rodata 104111 104120 +9
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 2 +-
|
||
|
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 8c471d693..2c3b49bc8 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -909,7 +909,7 @@ static int fmt_num(char *b, int size, const char *format, double n, int int_as_i
|
||
|
|
do { c = *s; } while (c && *++s);
|
||
|
|
if (strchr("diouxX", c)) {
|
||
|
|
r = snprintf(b, size, format, (int)n);
|
||
|
|
- } else if (strchr("eEfgG", c)) {
|
||
|
|
+ } else if (strchr("eEfFgGaA", c)) {
|
||
|
|
r = snprintf(b, size, format, n);
|
||
|
|
} else {
|
||
|
|
syntax_error(EMSG_INV_FMT);
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 8b97bd49bdd5181c211f5d7b64108edf9e8962f4 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sat, 3 Jul 2021 11:54:01 +0200
|
||
|
|
Subject: [PATCH 46/61] awk: do not use a copy of g_progname for
|
||
|
|
node->l.new_progname
|
||
|
|
|
||
|
|
We never destroy g_progname's, the strings still exist, no need to copy
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
chain_node 104 97 -7
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 4 ++--
|
||
|
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 2c3b49bc8..4119253ec 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -179,7 +179,7 @@ typedef struct node_s {
|
||
|
|
struct node_s *n;
|
||
|
|
var *v;
|
||
|
|
int aidx;
|
||
|
|
- char *new_progname;
|
||
|
|
+ const char *new_progname;
|
||
|
|
regex_t *re;
|
||
|
|
} l;
|
||
|
|
union {
|
||
|
|
@@ -1501,7 +1501,7 @@ static node *chain_node(uint32_t info)
|
||
|
|
if (seq->programname != g_progname) {
|
||
|
|
seq->programname = g_progname;
|
||
|
|
n = chain_node(OC_NEWSOURCE);
|
||
|
|
- n->l.new_progname = xstrdup(g_progname);
|
||
|
|
+ n->l.new_progname = g_progname;
|
||
|
|
}
|
||
|
|
|
||
|
|
n = seq->last;
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 61dc1b3f2201368a310b0754a74e6152fe6b015d Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sat, 3 Jul 2021 11:57:59 +0200
|
||
|
|
Subject: [PATCH 47/61] awk: rand(): 64-bit constants should be ULL
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 4 ++--
|
||
|
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 4119253ec..e4dd6684c 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -3169,9 +3169,9 @@ static var *evaluate(node *op, var *res)
|
||
|
|
uint64_t v = ((uint64_t)rand() << 32) | u;
|
||
|
|
/* the above shift+or is optimized out on 32-bit arches */
|
||
|
|
# if RAND_MAX > 0x7fffffff
|
||
|
|
- v &= 0x7fffffffffffffffUL;
|
||
|
|
+ v &= 0x7fffffffffffffffULL;
|
||
|
|
# endif
|
||
|
|
- R_d = (double)v / 0x8000000000000000UL;
|
||
|
|
+ R_d = (double)v / 0x8000000000000000ULL;
|
||
|
|
#else
|
||
|
|
# error Not implemented for this value of RAND_MAX
|
||
|
|
#endif
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From a6468234691fb0718fa0d57b9de4a7748f805af9 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sat, 3 Jul 2021 12:20:36 +0200
|
||
|
|
Subject: [PATCH 48/61] awk: match(): code shrink
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
do_match - 165 +165
|
||
|
|
exec_builtin_match 202 - -202
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 1/1 grow/shrink: 0/0 up/down: 165/-202) Total: -37 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 24 +++++++++++-------------
|
||
|
|
1 file changed, 11 insertions(+), 13 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index e4dd6684c..649198d15 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -2497,26 +2497,24 @@ static NOINLINE int do_mktime(const char *ds)
|
||
|
|
}
|
||
|
|
|
||
|
|
/* Reduce stack usage in exec_builtin() by keeping match() code separate */
|
||
|
|
-static NOINLINE void exec_builtin_match(node *an1, const char *as0, var *res)
|
||
|
|
+static NOINLINE var *do_match(node *an1, const char *as0)
|
||
|
|
{
|
||
|
|
regmatch_t pmatch[1];
|
||
|
|
regex_t sreg, *re;
|
||
|
|
- int n;
|
||
|
|
+ int n, start, len;
|
||
|
|
|
||
|
|
re = as_regex(an1, &sreg);
|
||
|
|
n = regexec(re, as0, 1, pmatch, 0);
|
||
|
|
- if (n == 0) {
|
||
|
|
- pmatch[0].rm_so++;
|
||
|
|
- pmatch[0].rm_eo++;
|
||
|
|
- } else {
|
||
|
|
- pmatch[0].rm_so = 0;
|
||
|
|
- pmatch[0].rm_eo = -1;
|
||
|
|
- }
|
||
|
|
if (re == &sreg)
|
||
|
|
regfree(re);
|
||
|
|
- setvar_i(newvar("RSTART"), pmatch[0].rm_so);
|
||
|
|
- setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
|
||
|
|
- setvar_i(res, pmatch[0].rm_so);
|
||
|
|
+ start = 0;
|
||
|
|
+ len = -1;
|
||
|
|
+ if (n == 0) {
|
||
|
|
+ start = pmatch[0].rm_so + 1;
|
||
|
|
+ len = pmatch[0].rm_eo - pmatch[0].rm_so;
|
||
|
|
+ }
|
||
|
|
+ setvar_i(newvar("RLENGTH"), len);
|
||
|
|
+ return setvar_i(newvar("RSTART"), start);
|
||
|
|
}
|
||
|
|
|
||
|
|
/* Reduce stack usage in evaluate() by keeping builtins' code separate */
|
||
|
|
@@ -2686,7 +2684,7 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
break;
|
||
|
|
|
||
|
|
case B_ma:
|
||
|
|
- exec_builtin_match(an[1], as[0], res);
|
||
|
|
+ res = do_match(an[1], as[0]);
|
||
|
|
break;
|
||
|
|
|
||
|
|
case B_ge:
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 9642f8123d92f8a1db9078178b04d22015d5e03a Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sat, 3 Jul 2021 13:29:32 +0200
|
||
|
|
Subject: [PATCH 49/61] awk: restore strdup elision optimization in assignment
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
evaluate 3339 3387 +48
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 25 +++++++++++++++++--------
|
||
|
|
1 file changed, 17 insertions(+), 8 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 649198d15..20672db9a 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -102,7 +102,7 @@ enum {
|
||
|
|
#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
|
||
|
|
#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
|
||
|
|
#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
|
||
|
|
-#define VF_FSTR 0x1000 /* 1 = var::string points to fstring buffer */
|
||
|
|
+#define VF_FSTR 0x1000 /* 1 = don't free() var::string (not malloced, or is owned by something else) */
|
||
|
|
#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
|
||
|
|
#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
|
||
|
|
|
||
|
|
@@ -1371,6 +1371,12 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
cn->a.n = vn->a.n;
|
||
|
|
if (tc & TS_BINOP) {
|
||
|
|
cn->l.n = vn;
|
||
|
|
+//FIXME: this is the place to detect and reject assignments to non-lvalues.
|
||
|
|
+//Currently we allow "assignments" to consts and temporaries, nonsense like this:
|
||
|
|
+// awk 'BEGIN { "qwe" = 1 }'
|
||
|
|
+// awk 'BEGIN { 7 *= 7 }'
|
||
|
|
+// awk 'BEGIN { length("qwe") = 1 }'
|
||
|
|
+// awk 'BEGIN { (1+1) += 3 }'
|
||
|
|
expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
|
||
|
|
if ((t_info & OPCLSMASK) == OC_PGETLINE) {
|
||
|
|
/* it's a pipe */
|
||
|
|
@@ -3043,14 +3049,17 @@ static var *evaluate(node *op, var *res)
|
||
|
|
case XC( OC_MOVE ):
|
||
|
|
debug_printf_eval("MOVE\n");
|
||
|
|
/* if source is a temporary string, jusk relink it to dest */
|
||
|
|
-//Disabled: if R.v is numeric but happens to have cached R.v->string,
|
||
|
|
-//then L.v ends up being a string, which is wrong
|
||
|
|
-// if (R.v == TMPVAR1 && R.v->string) {
|
||
|
|
-// res = setvar_p(L.v, R.v->string);
|
||
|
|
-// R.v->string = NULL;
|
||
|
|
-// } else {
|
||
|
|
+ if (R.v == TMPVAR1
|
||
|
|
+ && !(R.v->type & VF_NUMBER)
|
||
|
|
+ /* Why check !NUMBER? if R.v is a number but has cached R.v->string,
|
||
|
|
+ * L.v ends up a string, which is wrong */
|
||
|
|
+ /*&& R.v->string - always not NULL (right?) */
|
||
|
|
+ ) {
|
||
|
|
+ res = setvar_p(L.v, R.v->string); /* avoids strdup */
|
||
|
|
+ R.v->string = NULL;
|
||
|
|
+ } else {
|
||
|
|
res = copyvar(L.v, R.v);
|
||
|
|
-// }
|
||
|
|
+ }
|
||
|
|
break;
|
||
|
|
|
||
|
|
case XC( OC_TERNARY ):
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From c49ba79e1ce45367a1d994b12d972daae0698beb Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sat, 3 Jul 2021 13:57:47 +0200
|
||
|
|
Subject: [PATCH 50/61] awk: simplify tests for operation class
|
||
|
|
|
||
|
|
Usually, an operation class has only one possible value of "info" word.
|
||
|
|
In this case, just compare the entire info word, do not bother
|
||
|
|
to mask OPCLSMASK bits.
|
||
|
|
|
||
|
|
(Example where this is not the case: OC_REPLACE for "<op>=")
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
mk_splitter 106 100 -6
|
||
|
|
chain_group 616 610 -6
|
||
|
|
nextarg 40 32 -8
|
||
|
|
exec_builtin 1157 1149 -8
|
||
|
|
as_regex 111 103 -8
|
||
|
|
awk_split 553 543 -10
|
||
|
|
parse_expr 948 936 -12
|
||
|
|
awk_getline 656 642 -14
|
||
|
|
evaluate 3387 3343 -44
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 0/9 up/down: 0/-116) Total: -116 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 64 +++++++++++++++++++++++++++++----------------------
|
||
|
|
1 file changed, 36 insertions(+), 28 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 20672db9a..cd135ef64 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -432,7 +432,8 @@ static const char tokenlist[] ALIGN1 =
|
||
|
|
static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
0,
|
||
|
|
0,
|
||
|
|
- OC_REGEXP,
|
||
|
|
+#define TI_REGEXP OC_REGEXP
|
||
|
|
+ TI_REGEXP,
|
||
|
|
xS|'a', xS|'w', xS|'|',
|
||
|
|
OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
|
||
|
|
#define TI_PREINC (OC_UNARY|xV|P(9)|'P')
|
||
|
|
@@ -443,12 +444,17 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
|
||
|
|
OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
|
||
|
|
OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
|
||
|
|
-#define TI_LESS (OC_COMPARE|VV|P(39)|2)
|
||
|
|
+#define TI_LESS (OC_COMPARE|VV|P(39)|2)
|
||
|
|
TI_LESS, OC_MATCH|Sx|P(45)|'!', OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
|
||
|
|
- OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?', OC_COLON|xx|P(67)|':',
|
||
|
|
- OC_IN|SV|P(49), /* TC_IN */
|
||
|
|
- OC_COMMA|SS|P(80),
|
||
|
|
- OC_PGETLINE|SV|P(37),
|
||
|
|
+#define TI_TERNARY (OC_TERNARY|Vx|P(64)|'?')
|
||
|
|
+#define TI_COLON (OC_COLON|xx|P(67)|':')
|
||
|
|
+ OC_LOR|Vx|P(59), TI_TERNARY, TI_COLON,
|
||
|
|
+#define TI_IN (OC_IN|SV|P(49))
|
||
|
|
+ TI_IN,
|
||
|
|
+#define TI_COMMA (OC_COMMA|SS|P(80))
|
||
|
|
+ TI_COMMA,
|
||
|
|
+#define TI_PGETLINE (OC_PGETLINE|SV|P(37))
|
||
|
|
+ TI_PGETLINE,
|
||
|
|
OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-', OC_UNARY|xV|P(19)|'!',
|
||
|
|
0, /* ] */
|
||
|
|
0,
|
||
|
|
@@ -456,7 +462,8 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
0,
|
||
|
|
0, /* \n */
|
||
|
|
ST_IF, ST_DO, ST_FOR, OC_BREAK,
|
||
|
|
- OC_CONTINUE, OC_DELETE|Rx, OC_PRINT,
|
||
|
|
+#define TI_PRINT OC_PRINT
|
||
|
|
+ OC_CONTINUE, OC_DELETE|Rx, TI_PRINT,
|
||
|
|
OC_PRINTF, OC_NEXT, OC_NEXTFILE,
|
||
|
|
OC_RETURN|Vx, OC_EXIT|Nx,
|
||
|
|
ST_WHILE,
|
||
|
|
@@ -465,8 +472,8 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
// Highest byte bit pattern: nn s3s2s1 v3v2v1
|
||
|
|
// nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
|
||
|
|
// OC_F's are builtins with zero or one argument.
|
||
|
|
-// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt.
|
||
|
|
-// Check for no args is present in builtins' code (not in this table): rand, systime.
|
||
|
|
+// |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt
|
||
|
|
+// Check for no args is present in builtins' code (not in this table): rand, systime
|
||
|
|
// Have one _optional_ arg: fflush, srand, length
|
||
|
|
#define OC_B OC_BUILTIN
|
||
|
|
#define OC_F OC_FBLTIN
|
||
|
|
@@ -1310,7 +1317,7 @@ static node *new_node(uint32_t info)
|
||
|
|
|
||
|
|
static void mk_re_node(const char *s, node *n, regex_t *re)
|
||
|
|
{
|
||
|
|
- n->info = OC_REGEXP;
|
||
|
|
+ n->info = TI_REGEXP;
|
||
|
|
n->l.re = re;
|
||
|
|
n->r.ire = re + 1;
|
||
|
|
xregcomp(re, s, REG_EXTENDED);
|
||
|
|
@@ -1360,12 +1367,13 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
* previous operators with higher priority */
|
||
|
|
vn = cn;
|
||
|
|
while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
|
||
|
|
- || ((t_info == vn->info) && ((t_info & OPCLSMASK) == OC_COLON))
|
||
|
|
+ || ((t_info == vn->info) && t_info == TI_COLON)
|
||
|
|
) {
|
||
|
|
vn = vn->a.n;
|
||
|
|
if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
|
||
|
|
}
|
||
|
|
- if ((t_info & OPCLSMASK) == OC_TERNARY)
|
||
|
|
+ if (t_info == TI_TERNARY)
|
||
|
|
+//TODO: why?
|
||
|
|
t_info += P(6);
|
||
|
|
cn = vn->a.n->r.n = new_node(t_info);
|
||
|
|
cn->a.n = vn->a.n;
|
||
|
|
@@ -1378,7 +1386,7 @@ static node *parse_expr(uint32_t term_tc)
|
||
|
|
// awk 'BEGIN { length("qwe") = 1 }'
|
||
|
|
// awk 'BEGIN { (1+1) += 3 }'
|
||
|
|
expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
|
||
|
|
- if ((t_info & OPCLSMASK) == OC_PGETLINE) {
|
||
|
|
+ if (t_info == TI_PGETLINE) {
|
||
|
|
/* it's a pipe */
|
||
|
|
next_token(TC_GETLINE);
|
||
|
|
/* give maximum priority to this pipe */
|
||
|
|
@@ -1630,7 +1638,7 @@ static void chain_group(void)
|
||
|
|
next_token(TC_LPAREN);
|
||
|
|
n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
|
||
|
|
if (t_tclass & TC_RPAREN) { /* for-in */
|
||
|
|
- if (!n2 || (n2->info & OPCLSMASK) != OC_IN)
|
||
|
|
+ if (!n2 || n2->info != TI_IN)
|
||
|
|
syntax_error(EMSG_UNEXP_TOKEN);
|
||
|
|
n = chain_node(OC_WALKINIT | VV);
|
||
|
|
n->l.n = n2->l.n;
|
||
|
|
@@ -1834,7 +1842,7 @@ static node *mk_splitter(const char *s, tsplitter *spl)
|
||
|
|
re = &spl->re[0];
|
||
|
|
ire = &spl->re[1];
|
||
|
|
n = &spl->n;
|
||
|
|
- if ((n->info & OPCLSMASK) == OC_REGEXP) {
|
||
|
|
+ if (n->info == TI_REGEXP) {
|
||
|
|
regfree(re);
|
||
|
|
regfree(ire); // TODO: nuke ire, use re+1?
|
||
|
|
}
|
||
|
|
@@ -1858,7 +1866,7 @@ static regex_t *as_regex(node *op, regex_t *preg)
|
||
|
|
int cflags;
|
||
|
|
const char *s;
|
||
|
|
|
||
|
|
- if ((op->info & OPCLSMASK) == OC_REGEXP) {
|
||
|
|
+ if (op->info == TI_REGEXP) {
|
||
|
|
return icase ? op->r.ire : op->l.re;
|
||
|
|
}
|
||
|
|
|
||
|
|
@@ -1968,7 +1976,7 @@ static int awk_split(const char *s, node *spl, char **slist)
|
||
|
|
c[2] = '\n';
|
||
|
|
|
||
|
|
n = 0;
|
||
|
|
- if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
|
||
|
|
+ if (spl->info == TI_REGEXP) { /* regex split */
|
||
|
|
if (!*s)
|
||
|
|
return n; /* "": zero fields */
|
||
|
|
n++; /* at least one field will be there */
|
||
|
|
@@ -2135,7 +2143,7 @@ static node *nextarg(node **pn)
|
||
|
|
node *n;
|
||
|
|
|
||
|
|
n = *pn;
|
||
|
|
- if (n && (n->info & OPCLSMASK) == OC_COMMA) {
|
||
|
|
+ if (n && n->info == TI_COMMA) {
|
||
|
|
*pn = n->r.n;
|
||
|
|
n = n->l.n;
|
||
|
|
} else {
|
||
|
|
@@ -2229,7 +2237,7 @@ static int awk_getline(rstream *rsm, var *v)
|
||
|
|
so = eo = p;
|
||
|
|
r = 1;
|
||
|
|
if (p > 0) {
|
||
|
|
- if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
|
||
|
|
+ if (rsplitter.n.info == TI_REGEXP) {
|
||
|
|
if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
|
||
|
|
b, 1, pmatch, 0) == 0) {
|
||
|
|
so = pmatch[0].rm_so;
|
||
|
|
@@ -2575,8 +2583,8 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
char *s, *s1;
|
||
|
|
|
||
|
|
if (nargs > 2) {
|
||
|
|
- spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
|
||
|
|
- an[2] : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
|
||
|
|
+ spl = (an[2]->info == TI_REGEXP) ? an[2]
|
||
|
|
+ : mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
|
||
|
|
} else {
|
||
|
|
spl = &fsplitter.n;
|
||
|
|
}
|
||
|
|
@@ -2860,7 +2868,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
/* test pattern */
|
||
|
|
case XC( OC_TEST ):
|
||
|
|
debug_printf_eval("TEST\n");
|
||
|
|
- if ((op1->info & OPCLSMASK) == OC_COMMA) {
|
||
|
|
+ if (op1->info == TI_COMMA) {
|
||
|
|
/* it's range pattern */
|
||
|
|
if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
|
||
|
|
op->info |= OF_CHECKED;
|
||
|
|
@@ -2921,7 +2929,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
F = rsm->F;
|
||
|
|
}
|
||
|
|
|
||
|
|
- if ((opinfo & OPCLSMASK) == OC_PRINT) {
|
||
|
|
+ if (opinfo == TI_PRINT) {
|
||
|
|
if (!op1) {
|
||
|
|
fputs(getvar_s(intvar[F0]), F);
|
||
|
|
} else {
|
||
|
|
@@ -2940,7 +2948,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
fputs(getvar_s(intvar[ORS]), F);
|
||
|
|
- } else { /* OC_PRINTF */
|
||
|
|
+ } else { /* PRINTF */
|
||
|
|
char *s = awk_printf(op1, &len);
|
||
|
|
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
fwrite(s, len, 1, F);
|
||
|
|
@@ -3064,7 +3072,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
|
||
|
|
case XC( OC_TERNARY ):
|
||
|
|
debug_printf_eval("TERNARY\n");
|
||
|
|
- if ((op->r.n->info & OPCLSMASK) != OC_COLON)
|
||
|
|
+ if (op->r.n->info != TI_COLON)
|
||
|
|
syntax_error(EMSG_POSSIBLE_ERROR);
|
||
|
|
res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
|
||
|
|
break;
|
||
|
|
@@ -3122,7 +3130,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
if (op1) {
|
||
|
|
rsm = newfile(L.s);
|
||
|
|
if (!rsm->F) {
|
||
|
|
- if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
|
||
|
|
+ if (opinfo == TI_PGETLINE) {
|
||
|
|
rsm->F = popen(L.s, "r");
|
||
|
|
rsm->is_pipe = TRUE;
|
||
|
|
} else {
|
||
|
|
@@ -3158,7 +3166,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
double R_d = R_d; /* for compiler */
|
||
|
|
debug_printf_eval("FBLTIN\n");
|
||
|
|
|
||
|
|
- if (op1 && (op1->info & OPCLSMASK) == OC_COMMA)
|
||
|
|
+ if (op1 && op1->info == TI_COMMA)
|
||
|
|
/* Simple builtins take one arg maximum */
|
||
|
|
syntax_error("Too many arguments");
|
||
|
|
|
||
|
|
@@ -3358,7 +3366,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
case XC( OC_COMMA ): {
|
||
|
|
const char *sep = "";
|
||
|
|
debug_printf_eval("COMMA\n");
|
||
|
|
- if ((opinfo & OPCLSMASK) == OC_COMMA)
|
||
|
|
+ if (opinfo == TI_COMMA)
|
||
|
|
sep = getvar_s(intvar[SUBSEP]);
|
||
|
|
setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
|
||
|
|
break;
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 39122ab01367775898f3f46394942138176b4101 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sun, 4 Jul 2021 01:25:34 +0200
|
||
|
|
Subject: [PATCH 51/61] awk: fix printf buffer overflow
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
awk_printf 468 546 +78
|
||
|
|
fmt_num 239 247 +8
|
||
|
|
getvar_s 125 111 -14
|
||
|
|
evaluate 3343 3329 -14
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 2/2 up/down: 86/-28) Total: 58 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 94 ++++++++++++++++++++++++++++++---------------------
|
||
|
|
1 file changed, 55 insertions(+), 39 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index cd135ef64..a440a6234 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -904,25 +904,23 @@ static double my_strtod(char **pp)
|
||
|
|
|
||
|
|
/* -------- working with variables (set/get/copy/etc) -------- */
|
||
|
|
|
||
|
|
-static int fmt_num(char *b, int size, const char *format, double n, int int_as_int)
|
||
|
|
+static void fmt_num(const char *format, double n)
|
||
|
|
{
|
||
|
|
- int r = 0;
|
||
|
|
- char c;
|
||
|
|
- const char *s = format;
|
||
|
|
-
|
||
|
|
- if (int_as_int && n == (long long)n) {
|
||
|
|
- r = snprintf(b, size, "%lld", (long long)n);
|
||
|
|
+ if (n == (long long)n) {
|
||
|
|
+ snprintf(g_buf, MAXVARFMT, "%lld", (long long)n);
|
||
|
|
} else {
|
||
|
|
+ const char *s = format;
|
||
|
|
+ char c;
|
||
|
|
+
|
||
|
|
do { c = *s; } while (c && *++s);
|
||
|
|
if (strchr("diouxX", c)) {
|
||
|
|
- r = snprintf(b, size, format, (int)n);
|
||
|
|
+ snprintf(g_buf, MAXVARFMT, format, (int)n);
|
||
|
|
} else if (strchr("eEfFgGaA", c)) {
|
||
|
|
- r = snprintf(b, size, format, n);
|
||
|
|
+ snprintf(g_buf, MAXVARFMT, format, n);
|
||
|
|
} else {
|
||
|
|
syntax_error(EMSG_INV_FMT);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
- return r;
|
||
|
|
}
|
||
|
|
|
||
|
|
static xhash *iamarray(var *a)
|
||
|
|
@@ -999,7 +997,7 @@ static const char *getvar_s(var *v)
|
||
|
|
{
|
||
|
|
/* if v is numeric and has no cached string, convert it to string */
|
||
|
|
if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
|
||
|
|
- fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[CONVFMT]), v->number, TRUE);
|
||
|
|
+ fmt_num(getvar_s(intvar[CONVFMT]), v->number);
|
||
|
|
v->string = xstrdup(g_buf);
|
||
|
|
v->type |= VF_CACHED;
|
||
|
|
}
|
||
|
|
@@ -2315,12 +2313,9 @@ static int awk_getline(rstream *rsm, var *v)
|
||
|
|
#endif
|
||
|
|
static char *awk_printf(node *n, int *len)
|
||
|
|
{
|
||
|
|
- char *b = NULL;
|
||
|
|
- char *fmt, *s, *f;
|
||
|
|
- const char *s1;
|
||
|
|
- int i, j, incr, bsize;
|
||
|
|
- char c, c1;
|
||
|
|
- var *arg;
|
||
|
|
+ char *b;
|
||
|
|
+ char *fmt, *f;
|
||
|
|
+ int i;
|
||
|
|
|
||
|
|
//tmpvar = nvalloc(1);
|
||
|
|
#define TMPVAR (&G.awk_printf__tmpvar)
|
||
|
|
@@ -2333,8 +2328,14 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
// to evaluate() potentially recursing into another awk_printf() can't
|
||
|
|
// mangle the value.
|
||
|
|
|
||
|
|
+ b = NULL;
|
||
|
|
i = 0;
|
||
|
|
- while (*f) {
|
||
|
|
+ while (*f) { /* "print one format spec" loop */
|
||
|
|
+ char *s;
|
||
|
|
+ char c;
|
||
|
|
+ char sv;
|
||
|
|
+ var *arg;
|
||
|
|
+
|
||
|
|
s = f;
|
||
|
|
while (*f && (*f != '%' || *++f == '%'))
|
||
|
|
f++;
|
||
|
|
@@ -2343,40 +2344,55 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
syntax_error("%*x formats are not supported");
|
||
|
|
f++;
|
||
|
|
}
|
||
|
|
-
|
||
|
|
- incr = (f - s) + MAXVARFMT;
|
||
|
|
- b = qrealloc(b, incr + i, &bsize);
|
||
|
|
c = *f;
|
||
|
|
- if (c != '\0')
|
||
|
|
- f++;
|
||
|
|
- c1 = *f;
|
||
|
|
+ if (!c) {
|
||
|
|
+ /* Tail of fmt with no percent chars,
|
||
|
|
+ * or "....%" (percent seen, but no format specifier char found)
|
||
|
|
+ */
|
||
|
|
+ goto tail;
|
||
|
|
+ }
|
||
|
|
+ sv = *++f;
|
||
|
|
*f = '\0';
|
||
|
|
arg = evaluate(nextarg(&n), TMPVAR);
|
||
|
|
|
||
|
|
- j = i;
|
||
|
|
- if (c == 'c' || !c) {
|
||
|
|
- i += sprintf(b+i, s, is_numeric(arg) ?
|
||
|
|
+ /* Result can be arbitrarily long. Example:
|
||
|
|
+ * printf "%99999s", "BOOM"
|
||
|
|
+ */
|
||
|
|
+ if (c == 'c') {
|
||
|
|
+ s = xasprintf(s, is_numeric(arg) ?
|
||
|
|
(char)getvar_i(arg) : *getvar_s(arg));
|
||
|
|
} else if (c == 's') {
|
||
|
|
- s1 = getvar_s(arg);
|
||
|
|
- b = qrealloc(b, incr+i+strlen(s1), &bsize);
|
||
|
|
- i += sprintf(b+i, s, s1);
|
||
|
|
+ s = xasprintf(s, getvar_s(arg));
|
||
|
|
} else {
|
||
|
|
- i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
|
||
|
|
+ double d = getvar_i(arg);
|
||
|
|
+ if (strchr("diouxX", c)) {
|
||
|
|
+//TODO: make it wider here (%x -> %llx etc)?
|
||
|
|
+ s = xasprintf(s, (int)d);
|
||
|
|
+ } else if (strchr("eEfFgGaA", c)) {
|
||
|
|
+ s = xasprintf(s, d);
|
||
|
|
+ } else {
|
||
|
|
+ syntax_error(EMSG_INV_FMT);
|
||
|
|
+ }
|
||
|
|
}
|
||
|
|
- *f = c1;
|
||
|
|
+ *f = sv;
|
||
|
|
|
||
|
|
- /* if there was an error while sprintf, return value is negative */
|
||
|
|
- if (i < j)
|
||
|
|
- i = j;
|
||
|
|
+ if (i == 0) {
|
||
|
|
+ b = s;
|
||
|
|
+ i = strlen(b);
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ tail:
|
||
|
|
+ b = xrealloc(b, i + strlen(s) + 1);
|
||
|
|
+ i = stpcpy(b + i, s) - b;
|
||
|
|
+ if (!c) /* tail? */
|
||
|
|
+ break;
|
||
|
|
+ free(s);
|
||
|
|
}
|
||
|
|
|
||
|
|
free(fmt);
|
||
|
|
//nvfree(tmpvar, 1);
|
||
|
|
#undef TMPVAR
|
||
|
|
|
||
|
|
- b = xrealloc(b, i + 1);
|
||
|
|
- b[i] = '\0';
|
||
|
|
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
if (len)
|
||
|
|
*len = i;
|
||
|
|
@@ -2936,8 +2952,8 @@ static var *evaluate(node *op, var *res)
|
||
|
|
for (;;) {
|
||
|
|
var *v = evaluate(nextarg(&op1), TMPVAR0);
|
||
|
|
if (v->type & VF_NUMBER) {
|
||
|
|
- fmt_num(g_buf, MAXVARFMT, getvar_s(intvar[OFMT]),
|
||
|
|
- getvar_i(v), TRUE);
|
||
|
|
+ fmt_num(getvar_s(intvar[OFMT]),
|
||
|
|
+ getvar_i(v));
|
||
|
|
fputs(g_buf, F);
|
||
|
|
} else {
|
||
|
|
fputs(getvar_s(v), F);
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 9c55f6ae3f528a3416368e0aff9942d5b4ed216d Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sun, 11 Jul 2021 11:46:21 +0200
|
||
|
|
Subject: [PATCH 52/61] awk: rollback_token() + chain_group() ==
|
||
|
|
chain_until_rbrace()
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
parse_program 336 332 -4
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 3 +--
|
||
|
|
1 file changed, 1 insertion(+), 2 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index a440a6234..755e68fc7 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1778,8 +1778,7 @@ static void parse_program(char *p)
|
||
|
|
cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
|
||
|
|
if (t_tclass == TC_LBRACE) {
|
||
|
|
debug_printf_parse("%s: TC_LBRACE\n", __func__);
|
||
|
|
- rollback_token();
|
||
|
|
- chain_group();
|
||
|
|
+ chain_until_rbrace();
|
||
|
|
} else {
|
||
|
|
/* no action, assume default "{ print }" */
|
||
|
|
debug_printf_parse("%s: !TC_LBRACE\n", __func__);
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From bd0d2c3b5bf5c9337e67b43222bafcdf80c4e36a Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sun, 11 Jul 2021 12:00:31 +0200
|
||
|
|
Subject: [PATCH 53/61] awk: undo TI_PRINT, it introduced a bug (print with any
|
||
|
|
redirect acting as printf)
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
evaluate 3329 3337 +8
|
||
|
|
|
||
|
|
Patch by Ron Yorston <rmy@pobox.com>
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 8 +++++---
|
||
|
|
testsuite/awk.tests | 5 +++++
|
||
|
|
2 files changed, 10 insertions(+), 3 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 755e68fc7..0aa7c0804 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -462,8 +462,7 @@ static const uint32_t tokeninfo[] ALIGN4 = {
|
||
|
|
0,
|
||
|
|
0, /* \n */
|
||
|
|
ST_IF, ST_DO, ST_FOR, OC_BREAK,
|
||
|
|
-#define TI_PRINT OC_PRINT
|
||
|
|
- OC_CONTINUE, OC_DELETE|Rx, TI_PRINT,
|
||
|
|
+ OC_CONTINUE, OC_DELETE|Rx, OC_PRINT,
|
||
|
|
OC_PRINTF, OC_NEXT, OC_NEXTFILE,
|
||
|
|
OC_RETURN|Vx, OC_EXIT|Nx,
|
||
|
|
ST_WHILE,
|
||
|
|
@@ -2944,7 +2943,10 @@ static var *evaluate(node *op, var *res)
|
||
|
|
F = rsm->F;
|
||
|
|
}
|
||
|
|
|
||
|
|
- if (opinfo == TI_PRINT) {
|
||
|
|
+ /* Can't just check 'opinfo == OC_PRINT' here, parser ORs
|
||
|
|
+ * additional bits to opinfos of print/printf with redirects
|
||
|
|
+ */
|
||
|
|
+ if ((opinfo & OPCLSMASK) == OC_PRINT) {
|
||
|
|
if (!op1) {
|
||
|
|
fputs(getvar_s(intvar[F0]), F);
|
||
|
|
} else {
|
||
|
|
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
|
||
|
|
index 770d8ffce..6b23b91cb 100755
|
||
|
|
--- a/testsuite/awk.tests
|
||
|
|
+++ b/testsuite/awk.tests
|
||
|
|
@@ -450,4 +450,9 @@ testing "awk exit N propagates through END's exit" \
|
||
|
|
"42\n" \
|
||
|
|
'' ''
|
||
|
|
|
||
|
|
+testing "awk print + redirect" \
|
||
|
|
+ "awk 'BEGIN { print \"STDERR %s\" >\"/dev/stderr\" }' 2>&1" \
|
||
|
|
+ "STDERR %s\n" \
|
||
|
|
+ '' ''
|
||
|
|
+
|
||
|
|
exit $FAILCOUNT
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 5ed199c07d9ffc947443118dda0e0af6569588d5 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sun, 11 Jul 2021 12:25:33 +0200
|
||
|
|
Subject: [PATCH 54/61] awk: unbreak "printf('%c') can output NUL" testcase
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
awk_printf 546 593 +47
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 43 ++++++++++++++++++++++++++-----------------
|
||
|
|
1 file changed, 26 insertions(+), 17 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 0aa7c0804..e765d3fcf 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -2309,11 +2309,11 @@ static int awk_getline(rstream *rsm, var *v)
|
||
|
|
#if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
# define awk_printf(a, b) awk_printf(a)
|
||
|
|
#endif
|
||
|
|
-static char *awk_printf(node *n, int *len)
|
||
|
|
+static char *awk_printf(node *n, size_t *len)
|
||
|
|
{
|
||
|
|
char *b;
|
||
|
|
char *fmt, *f;
|
||
|
|
- int i;
|
||
|
|
+ size_t i;
|
||
|
|
|
||
|
|
//tmpvar = nvalloc(1);
|
||
|
|
#define TMPVAR (&G.awk_printf__tmpvar)
|
||
|
|
@@ -2333,6 +2333,7 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
char c;
|
||
|
|
char sv;
|
||
|
|
var *arg;
|
||
|
|
+ size_t slen;
|
||
|
|
|
||
|
|
s = f;
|
||
|
|
while (*f && (*f != '%' || *++f == '%'))
|
||
|
|
@@ -2347,6 +2348,7 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
/* Tail of fmt with no percent chars,
|
||
|
|
* or "....%" (percent seen, but no format specifier char found)
|
||
|
|
*/
|
||
|
|
+ slen = strlen(s);
|
||
|
|
goto tail;
|
||
|
|
}
|
||
|
|
sv = *++f;
|
||
|
|
@@ -2357,31 +2359,38 @@ static char *awk_printf(node *n, int *len)
|
||
|
|
* printf "%99999s", "BOOM"
|
||
|
|
*/
|
||
|
|
if (c == 'c') {
|
||
|
|
- s = xasprintf(s, is_numeric(arg) ?
|
||
|
|
- (char)getvar_i(arg) : *getvar_s(arg));
|
||
|
|
- } else if (c == 's') {
|
||
|
|
- s = xasprintf(s, getvar_s(arg));
|
||
|
|
+ c = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
|
||
|
|
+ s = xasprintf(s, c);
|
||
|
|
+ /* + 1 if c == NUL: handle printf "%c" 0 case
|
||
|
|
+ * (and printf "%22c" 0 etc, but still fails for e.g. printf "%-22c" 0) */
|
||
|
|
+ slen = strlen(s) + (c == '\0');
|
||
|
|
} else {
|
||
|
|
- double d = getvar_i(arg);
|
||
|
|
- if (strchr("diouxX", c)) {
|
||
|
|
-//TODO: make it wider here (%x -> %llx etc)?
|
||
|
|
- s = xasprintf(s, (int)d);
|
||
|
|
- } else if (strchr("eEfFgGaA", c)) {
|
||
|
|
- s = xasprintf(s, d);
|
||
|
|
+ if (c == 's') {
|
||
|
|
+ s = xasprintf(s, getvar_s(arg));
|
||
|
|
} else {
|
||
|
|
- syntax_error(EMSG_INV_FMT);
|
||
|
|
+ double d = getvar_i(arg);
|
||
|
|
+ if (strchr("diouxX", c)) {
|
||
|
|
+//TODO: make it wider here (%x -> %llx etc)?
|
||
|
|
+ s = xasprintf(s, (int)d);
|
||
|
|
+ } else if (strchr("eEfFgGaA", c)) {
|
||
|
|
+ s = xasprintf(s, d);
|
||
|
|
+ } else {
|
||
|
|
+ syntax_error(EMSG_INV_FMT);
|
||
|
|
+ }
|
||
|
|
}
|
||
|
|
+ slen = strlen(s);
|
||
|
|
}
|
||
|
|
*f = sv;
|
||
|
|
|
||
|
|
if (i == 0) {
|
||
|
|
b = s;
|
||
|
|
- i = strlen(b);
|
||
|
|
+ i = slen;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
tail:
|
||
|
|
- b = xrealloc(b, i + strlen(s) + 1);
|
||
|
|
- i = stpcpy(b + i, s) - b;
|
||
|
|
+ b = xrealloc(b, i + slen + 1);
|
||
|
|
+ strcpy(b + i, s);
|
||
|
|
+ i += slen;
|
||
|
|
if (!c) /* tail? */
|
||
|
|
break;
|
||
|
|
free(s);
|
||
|
|
@@ -2926,7 +2935,6 @@ static var *evaluate(node *op, var *res)
|
||
|
|
debug_printf_eval("PRINTF\n");
|
||
|
|
{
|
||
|
|
FILE *F = stdout;
|
||
|
|
- IF_FEATURE_AWK_GNU_EXTENSIONS(int len;)
|
||
|
|
|
||
|
|
if (op->r.n) {
|
||
|
|
rstream *rsm = newfile(R.s);
|
||
|
|
@@ -2966,6 +2974,7 @@ static var *evaluate(node *op, var *res)
|
||
|
|
}
|
||
|
|
fputs(getvar_s(intvar[ORS]), F);
|
||
|
|
} else { /* PRINTF */
|
||
|
|
+ IF_FEATURE_AWK_GNU_EXTENSIONS(size_t len;)
|
||
|
|
char *s = awk_printf(op1, &len);
|
||
|
|
#if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
fwrite(s, len, 1, F);
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From f38b2d9bcddd00432150567bef8f8a2bf0d1ed43 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sun, 11 Jul 2021 12:51:43 +0200
|
||
|
|
Subject: [PATCH 55/61] awk: unbreak "cmd" | getline
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
evaluate 3337 3343 +6
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 3 ++-
|
||
|
|
testsuite/awk.tests | 5 +++++
|
||
|
|
2 files changed, 7 insertions(+), 1 deletion(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index e765d3fcf..6c60a0615 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -3156,7 +3156,8 @@ static var *evaluate(node *op, var *res)
|
||
|
|
if (op1) {
|
||
|
|
rsm = newfile(L.s);
|
||
|
|
if (!rsm->F) {
|
||
|
|
- if (opinfo == TI_PGETLINE) {
|
||
|
|
+ /* NB: can't use "opinfo == TI_PGETLINE", would break "cmd" | getline */
|
||
|
|
+ if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
|
||
|
|
rsm->F = popen(L.s, "r");
|
||
|
|
rsm->is_pipe = TRUE;
|
||
|
|
} else {
|
||
|
|
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
|
||
|
|
index 6b23b91cb..242c897d1 100755
|
||
|
|
--- a/testsuite/awk.tests
|
||
|
|
+++ b/testsuite/awk.tests
|
||
|
|
@@ -455,4 +455,9 @@ testing "awk print + redirect" \
|
||
|
|
"STDERR %s\n" \
|
||
|
|
'' ''
|
||
|
|
|
||
|
|
+testing "awk \"cmd\" | getline" \
|
||
|
|
+ "awk 'BEGIN { \"echo HELLO\" | getline; print }'" \
|
||
|
|
+ "HELLO\n" \
|
||
|
|
+ '' ''
|
||
|
|
+
|
||
|
|
exit $FAILCOUNT
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 3a759a81580a1f7d9b4428e30c623324ec2e3699 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Sun, 11 Jul 2021 18:16:10 +0200
|
||
|
|
Subject: [PATCH 56/61] awk: fix corner case in awk_printf
|
||
|
|
|
||
|
|
Example where it wasn't working:
|
||
|
|
awk 'BEGIN { printf "qwe %s rty %c uio\n", "a", 0, "c" }'
|
||
|
|
- the NUL printing in %c caused premature stop of printing.
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
awk_printf 593 596 +3
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 8 ++++----
|
||
|
|
1 file changed, 4 insertions(+), 4 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 6c60a0615..465033f5f 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -2359,11 +2359,11 @@ static char *awk_printf(node *n, size_t *len)
|
||
|
|
* printf "%99999s", "BOOM"
|
||
|
|
*/
|
||
|
|
if (c == 'c') {
|
||
|
|
- c = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
|
||
|
|
- s = xasprintf(s, c);
|
||
|
|
- /* + 1 if c == NUL: handle printf "%c" 0 case
|
||
|
|
+ char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
|
||
|
|
+ s = xasprintf(s, cc);
|
||
|
|
+ /* + 1 if cc == NUL: handle printf "%c" 0 case
|
||
|
|
* (and printf "%22c" 0 etc, but still fails for e.g. printf "%-22c" 0) */
|
||
|
|
- slen = strlen(s) + (c == '\0');
|
||
|
|
+ slen = strlen(s) + (cc == '\0');
|
||
|
|
} else {
|
||
|
|
if (c == 's') {
|
||
|
|
s = xasprintf(s, getvar_s(arg));
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From e62366d32f13e059266e2996a68be023bef309ef Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Mon, 12 Jul 2021 11:27:11 +0200
|
||
|
|
Subject: [PATCH 57/61] awk: fix printf "%-10c", 0
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
awk_printf 596 626 +30
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 9 +++++----
|
||
|
|
testsuite/awk.tests | 8 ++++++++
|
||
|
|
2 files changed, 13 insertions(+), 4 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 465033f5f..437d87ecf 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -2360,10 +2360,11 @@ static char *awk_printf(node *n, size_t *len)
|
||
|
|
*/
|
||
|
|
if (c == 'c') {
|
||
|
|
char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
|
||
|
|
- s = xasprintf(s, cc);
|
||
|
|
- /* + 1 if cc == NUL: handle printf "%c" 0 case
|
||
|
|
- * (and printf "%22c" 0 etc, but still fails for e.g. printf "%-22c" 0) */
|
||
|
|
- slen = strlen(s) + (cc == '\0');
|
||
|
|
+ char *r = xasprintf(s, cc ? cc : '^' /* else strlen will be wrong */);
|
||
|
|
+ slen = strlen(r);
|
||
|
|
+ if (cc == '\0') /* if cc is NUL, re-format the string with it */
|
||
|
|
+ sprintf(r, s, cc);
|
||
|
|
+ s = r;
|
||
|
|
} else {
|
||
|
|
if (c == 's') {
|
||
|
|
s = xasprintf(s, getvar_s(arg));
|
||
|
|
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
|
||
|
|
index 242c897d1..3cddb4dd4 100755
|
||
|
|
--- a/testsuite/awk.tests
|
||
|
|
+++ b/testsuite/awk.tests
|
||
|
|
@@ -415,6 +415,14 @@ testing "awk printf('%c') can output NUL" \
|
||
|
|
"awk '{printf(\"hello%c null\n\", 0)}'" "hello\0 null\n" "" "\n"
|
||
|
|
SKIP=
|
||
|
|
|
||
|
|
+optional FEATURE_AWK_GNU_EXTENSIONS
|
||
|
|
+testing "awk printf('%-10c') can output NUL" \
|
||
|
|
+ "awk 'BEGIN { printf \"[%-10c]\n\", 0 }' | od -tx1" "\
|
||
|
|
+0000000 5b 00 20 20 20 20 20 20 20 20 20 5d 0a
|
||
|
|
+0000015
|
||
|
|
+" "" ""
|
||
|
|
+SKIP=
|
||
|
|
+
|
||
|
|
# testing "description" "command" "result" "infile" "stdin"
|
||
|
|
testing 'awk negative field access' \
|
||
|
|
'awk 2>&1 -- '\''{ $(-1) }'\' \
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 258057e67d4403d43f48788fabdf874c1bb59502 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Mon, 12 Jul 2021 13:30:30 +0200
|
||
|
|
Subject: [PATCH 58/61] awk: in parsing, remove superfluous NEWLINE check;
|
||
|
|
optimize builtin arg evaluation
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
exec_builtin 1149 1145 -4
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 11 ++++++-----
|
||
|
|
1 file changed, 6 insertions(+), 5 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 437d87ecf..7a282356d 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1589,8 +1589,8 @@ static void chain_group(void)
|
||
|
|
chain_until_rbrace();
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
- if (tc & (TS_OPSEQ | TC_SEMICOL | TC_NEWLINE)) {
|
||
|
|
- debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL | TC_NEWLINE\n", __func__);
|
||
|
|
+ if (tc & (TS_OPSEQ | TC_SEMICOL)) {
|
||
|
|
+ debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL\n", __func__);
|
||
|
|
rollback_token();
|
||
|
|
chain_expr(OC_EXEC | Vx);
|
||
|
|
return;
|
||
|
|
@@ -2582,10 +2582,11 @@ static NOINLINE var *exec_builtin(node *op, var *res)
|
||
|
|
av[2] = av[3] = NULL;
|
||
|
|
for (i = 0; i < 4 && op; i++) {
|
||
|
|
an[i] = nextarg(&op);
|
||
|
|
- if (isr & 0x09000000)
|
||
|
|
+ if (isr & 0x09000000) {
|
||
|
|
av[i] = evaluate(an[i], TMPVAR(i));
|
||
|
|
- if (isr & 0x08000000)
|
||
|
|
- as[i] = getvar_s(av[i]);
|
||
|
|
+ if (isr & 0x08000000)
|
||
|
|
+ as[i] = getvar_s(av[i]);
|
||
|
|
+ }
|
||
|
|
isr >>= 1;
|
||
|
|
}
|
||
|
|
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 18fe636700ac5d795027d920922340410f65640e Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Wed, 14 Jul 2021 14:25:07 +0200
|
||
|
|
Subject: [PATCH 59/61] awk: tighten parsing - disallow extra semicolons
|
||
|
|
|
||
|
|
'; BEGIN {...}' and 'BEGIN {...} ;; {...}' are not accepted by gawk
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
parse_program 332 353 +21
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 40 ++++++++++++++++++++++++----------------
|
||
|
|
1 file changed, 24 insertions(+), 16 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 7a282356d..2f8a18c8e 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1634,7 +1634,7 @@ static void chain_group(void)
|
||
|
|
debug_printf_parse("%s: ST_FOR\n", __func__);
|
||
|
|
next_token(TC_LPAREN);
|
||
|
|
n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
|
||
|
|
- if (t_tclass & TC_RPAREN) { /* for-in */
|
||
|
|
+ if (t_tclass & TC_RPAREN) { /* for (I in ARRAY) */
|
||
|
|
if (!n2 || n2->info != TI_IN)
|
||
|
|
syntax_error(EMSG_UNEXP_TOKEN);
|
||
|
|
n = chain_node(OC_WALKINIT | VV);
|
||
|
|
@@ -1700,20 +1700,15 @@ static void parse_program(char *p)
|
||
|
|
for (;;) {
|
||
|
|
uint32_t tclass;
|
||
|
|
|
||
|
|
- tclass = next_token(TC_EOF | TS_OPSEQ | TC_LBRACE |
|
||
|
|
- TC_SEMICOL | TC_NEWLINE | TC_BEGIN | TC_END | TC_FUNCDECL);
|
||
|
|
-
|
||
|
|
+ tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
|
||
|
|
+ | TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */);
|
||
|
|
+ got_tok:
|
||
|
|
if (tclass == TC_EOF) {
|
||
|
|
debug_printf_parse("%s: TC_EOF\n", __func__);
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
- if (tclass & (TC_SEMICOL | TC_NEWLINE)) {
|
||
|
|
- debug_printf_parse("%s: TC_SEMICOL | TC_NEWLINE\n", __func__);
|
||
|
|
-//NB: gawk allows many newlines, but does not allow more than one semicolon:
|
||
|
|
-// BEGIN {...}<newline>;<newline>;
|
||
|
|
-//would complain "each rule must have a pattern or an action part".
|
||
|
|
-//Same message for
|
||
|
|
-// ; BEGIN {...}
|
||
|
|
+ if (tclass == TC_NEWLINE) {
|
||
|
|
+ debug_printf_parse("%s: TC_NEWLINE\n", __func__);
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if (tclass == TC_BEGIN) {
|
||
|
|
@@ -1722,7 +1717,7 @@ static void parse_program(char *p)
|
||
|
|
/* ensure there is no newline between BEGIN and { */
|
||
|
|
next_token(TC_LBRACE);
|
||
|
|
chain_until_rbrace();
|
||
|
|
- continue;
|
||
|
|
+ goto next_tok;
|
||
|
|
}
|
||
|
|
if (tclass == TC_END) {
|
||
|
|
debug_printf_parse("%s: TC_END\n", __func__);
|
||
|
|
@@ -1730,7 +1725,7 @@ static void parse_program(char *p)
|
||
|
|
/* ensure there is no newline between END and { */
|
||
|
|
next_token(TC_LBRACE);
|
||
|
|
chain_until_rbrace();
|
||
|
|
- continue;
|
||
|
|
+ goto next_tok;
|
||
|
|
}
|
||
|
|
if (tclass == TC_FUNCDECL) {
|
||
|
|
func *f;
|
||
|
|
@@ -1765,7 +1760,7 @@ static void parse_program(char *p)
|
||
|
|
continue;
|
||
|
|
chain_until_rbrace();
|
||
|
|
hash_clear(ahash);
|
||
|
|
- continue;
|
||
|
|
+ goto next_tok;
|
||
|
|
}
|
||
|
|
seq = &mainseq;
|
||
|
|
if (tclass & TS_OPSEQ) {
|
||
|
|
@@ -1784,12 +1779,25 @@ static void parse_program(char *p)
|
||
|
|
chain_node(OC_PRINT);
|
||
|
|
}
|
||
|
|
cn->r.n = mainseq.last;
|
||
|
|
- continue;
|
||
|
|
+ goto next_tok;
|
||
|
|
}
|
||
|
|
/* tclass == TC_LBRACE */
|
||
|
|
debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
|
||
|
|
chain_until_rbrace();
|
||
|
|
- }
|
||
|
|
+ next_tok:
|
||
|
|
+ /* Same as next_token() at the top of the loop, + TC_SEMICOL */
|
||
|
|
+ tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
|
||
|
|
+ | TC_EOF | TC_NEWLINE | TC_SEMICOL);
|
||
|
|
+ /* gawk allows many newlines, but does not allow more than one semicolon:
|
||
|
|
+ * BEGIN {...}<newline>;<newline>;
|
||
|
|
+ * would complain "each rule must have a pattern or an action part".
|
||
|
|
+ * Same message for
|
||
|
|
+ * ; BEGIN {...}
|
||
|
|
+ */
|
||
|
|
+ if (tclass != TC_SEMICOL)
|
||
|
|
+ goto got_tok; /* use this token */
|
||
|
|
+ /* else: loop back - ate the semicolon, get and use _next_ token */
|
||
|
|
+ } /* for (;;) */
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 9b502f61277aa48a412dd1a18e7a30b5d4c3d71a Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Wed, 14 Jul 2021 14:33:37 +0200
|
||
|
|
Subject: [PATCH 60/61] awk: disallow break/continue outside of loops
|
||
|
|
|
||
|
|
function old new delta
|
||
|
|
.rodata 104139 104186 +47
|
||
|
|
chain_group 610 633 +23
|
||
|
|
------------------------------------------------------------------------------
|
||
|
|
(add/remove: 0/0 grow/shrink: 2/0 up/down: 70/0) Total: 70 bytes
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 6 ++++--
|
||
|
|
testsuite/awk.tests | 9 ++-------
|
||
|
|
2 files changed, 6 insertions(+), 9 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 2f8a18c8e..607d69487 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -1671,16 +1671,18 @@ static void chain_group(void)
|
||
|
|
case OC_BREAK:
|
||
|
|
debug_printf_parse("%s: OC_BREAK\n", __func__);
|
||
|
|
n = chain_node(OC_EXEC);
|
||
|
|
+ if (!break_ptr)
|
||
|
|
+ syntax_error("'break' not in a loop");
|
||
|
|
n->a.n = break_ptr;
|
||
|
|
-//TODO: if break_ptr is NULL, syntax error (not in the loop)?
|
||
|
|
chain_expr(t_info);
|
||
|
|
break;
|
||
|
|
|
||
|
|
case OC_CONTINUE:
|
||
|
|
debug_printf_parse("%s: OC_CONTINUE\n", __func__);
|
||
|
|
n = chain_node(OC_EXEC);
|
||
|
|
+ if (!continue_ptr)
|
||
|
|
+ syntax_error("'continue' not in a loop");
|
||
|
|
n->a.n = continue_ptr;
|
||
|
|
-//TODO: if continue_ptr is NULL, syntax error (not in the loop)?
|
||
|
|
chain_expr(t_info);
|
||
|
|
break;
|
||
|
|
|
||
|
|
diff --git a/testsuite/awk.tests b/testsuite/awk.tests
|
||
|
|
index 3cddb4dd4..f53b1efe2 100755
|
||
|
|
--- a/testsuite/awk.tests
|
||
|
|
+++ b/testsuite/awk.tests
|
||
|
|
@@ -379,19 +379,14 @@ testing "awk -e and ARGC" \
|
||
|
|
""
|
||
|
|
SKIP=
|
||
|
|
|
||
|
|
-# The examples are in fact not valid awk programs (break/continue
|
||
|
|
-# can only be used inside loops).
|
||
|
|
-# But we do accept them outside of loops.
|
||
|
|
-# We had a bug with misparsing "break ; else" sequence.
|
||
|
|
-# Test that *that* bug is fixed, using simplest possible scripts:
|
||
|
|
testing "awk break" \
|
||
|
|
"awk -f - 2>&1; echo \$?" \
|
||
|
|
- "0\n" \
|
||
|
|
+ "awk: -:1: 'break' not in a loop\n1\n" \
|
||
|
|
"" \
|
||
|
|
'BEGIN { if (1) break; else a = 1 }'
|
||
|
|
testing "awk continue" \
|
||
|
|
"awk -f - 2>&1; echo \$?" \
|
||
|
|
- "0\n" \
|
||
|
|
+ "awk: -:1: 'continue' not in a loop\n1\n" \
|
||
|
|
"" \
|
||
|
|
'BEGIN { if (1) continue; else a = 1 }'
|
||
|
|
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|
||
|
|
|
||
|
|
From 027b43ab6700b85f037fb69c08ad052cff6a7384 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
Date: Wed, 14 Jul 2021 16:58:05 +0200
|
||
|
|
Subject: [PATCH 61/61] awk: whitespace and debugging tweaks
|
||
|
|
|
||
|
|
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
|
||
|
|
---
|
||
|
|
editors/awk.c | 133 +++++++++++++++++++++++++-------------------------
|
||
|
|
1 file changed, 66 insertions(+), 67 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/editors/awk.c b/editors/awk.c
|
||
|
|
index 607d69487..3adbca7aa 100644
|
||
|
|
--- a/editors/awk.c
|
||
|
|
+++ b/editors/awk.c
|
||
|
|
@@ -199,77 +199,78 @@ typedef struct tsplitter_s {
|
||
|
|
|
||
|
|
/* simple token classes */
|
||
|
|
/* order and hex values are very important!!! See next_token() */
|
||
|
|
-#define TC_LPAREN (1 << 0) /* ( */
|
||
|
|
-#define TC_RPAREN (1 << 1) /* ) */
|
||
|
|
-#define TC_REGEXP (1 << 2) /* /.../ */
|
||
|
|
-#define TC_OUTRDR (1 << 3) /* | > >> */
|
||
|
|
-#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */
|
||
|
|
-#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */
|
||
|
|
-#define TC_BINOPX (1 << 6) /* two-opnd operator */
|
||
|
|
-#define TC_IN (1 << 7) /* 'in' */
|
||
|
|
-#define TC_COMMA (1 << 8) /* , */
|
||
|
|
-#define TC_PIPE (1 << 9) /* input redirection pipe | */
|
||
|
|
-#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
|
||
|
|
-#define TC_ARRTERM (1 << 11) /* ] */
|
||
|
|
-#define TC_LBRACE (1 << 12) /* { */
|
||
|
|
-#define TC_RBRACE (1 << 13) /* } */
|
||
|
|
-#define TC_SEMICOL (1 << 14) /* ; */
|
||
|
|
-#define TC_NEWLINE (1 << 15)
|
||
|
|
-#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
|
||
|
|
-#define TC_WHILE (1 << 17) /* 'while' */
|
||
|
|
-#define TC_ELSE (1 << 18) /* 'else' */
|
||
|
|
-#define TC_BUILTIN (1 << 19)
|
||
|
|
+#define TC_LPAREN (1 << 0) /* ( */
|
||
|
|
+#define TC_RPAREN (1 << 1) /* ) */
|
||
|
|
+#define TC_REGEXP (1 << 2) /* /.../ */
|
||
|
|
+#define TC_OUTRDR (1 << 3) /* | > >> */
|
||
|
|
+#define TC_UOPPOST (1 << 4) /* unary postfix operator ++ -- */
|
||
|
|
+#define TC_UOPPRE1 (1 << 5) /* unary prefix operator ++ -- $ */
|
||
|
|
+#define TC_BINOPX (1 << 6) /* two-opnd operator */
|
||
|
|
+#define TC_IN (1 << 7) /* 'in' */
|
||
|
|
+#define TC_COMMA (1 << 8) /* , */
|
||
|
|
+#define TC_PIPE (1 << 9) /* input redirection pipe | */
|
||
|
|
+#define TC_UOPPRE2 (1 << 10) /* unary prefix operator + - ! */
|
||
|
|
+#define TC_ARRTERM (1 << 11) /* ] */
|
||
|
|
+#define TC_LBRACE (1 << 12) /* { */
|
||
|
|
+#define TC_RBRACE (1 << 13) /* } */
|
||
|
|
+#define TC_SEMICOL (1 << 14) /* ; */
|
||
|
|
+#define TC_NEWLINE (1 << 15)
|
||
|
|
+#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
|
||
|
|
+#define TC_WHILE (1 << 17) /* 'while' */
|
||
|
|
+#define TC_ELSE (1 << 18) /* 'else' */
|
||
|
|
+#define TC_BUILTIN (1 << 19)
|
||
|
|
/* This costs ~50 bytes of code.
|
||
|
|
* A separate class to support deprecated "length" form. If we don't need that
|
||
|
|
* (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
|
||
|
|
* can be merged with TC_BUILTIN:
|
||
|
|
*/
|
||
|
|
-#define TC_LENGTH (1 << 20) /* 'length' */
|
||
|
|
-#define TC_GETLINE (1 << 21) /* 'getline' */
|
||
|
|
-#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */
|
||
|
|
-#define TC_BEGIN (1 << 23) /* 'BEGIN' */
|
||
|
|
-#define TC_END (1 << 24) /* 'END' */
|
||
|
|
-#define TC_EOF (1 << 25)
|
||
|
|
-#define TC_VARIABLE (1 << 26) /* name */
|
||
|
|
-#define TC_ARRAY (1 << 27) /* name[ */
|
||
|
|
-#define TC_FUNCTION (1 << 28) /* name( */
|
||
|
|
-#define TC_STRING (1 << 29) /* "..." */
|
||
|
|
-#define TC_NUMBER (1 << 30)
|
||
|
|
+#define TC_LENGTH (1 << 20) /* 'length' */
|
||
|
|
+#define TC_GETLINE (1 << 21) /* 'getline' */
|
||
|
|
+#define TC_FUNCDECL (1 << 22) /* 'function' 'func' */
|
||
|
|
+#define TC_BEGIN (1 << 23) /* 'BEGIN' */
|
||
|
|
+#define TC_END (1 << 24) /* 'END' */
|
||
|
|
+#define TC_EOF (1 << 25)
|
||
|
|
+#define TC_VARIABLE (1 << 26) /* name */
|
||
|
|
+#define TC_ARRAY (1 << 27) /* name[ */
|
||
|
|
+#define TC_FUNCTION (1 << 28) /* name( */
|
||
|
|
+#define TC_STRING (1 << 29) /* "..." */
|
||
|
|
+#define TC_NUMBER (1 << 30)
|
||
|
|
|
||
|
|
#ifndef debug_parse_print_tc
|
||
|
|
-#define debug_parse_print_tc(n) do { \
|
||
|
|
-if ((n) & TC_LPAREN ) debug_printf_parse(" LPAREN" ); \
|
||
|
|
-if ((n) & TC_RPAREN ) debug_printf_parse(" RPAREN" ); \
|
||
|
|
-if ((n) & TC_REGEXP ) debug_printf_parse(" REGEXP" ); \
|
||
|
|
-if ((n) & TC_OUTRDR ) debug_printf_parse(" OUTRDR" ); \
|
||
|
|
-if ((n) & TC_UOPPOST ) debug_printf_parse(" UOPPOST" ); \
|
||
|
|
-if ((n) & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" ); \
|
||
|
|
-if ((n) & TC_BINOPX ) debug_printf_parse(" BINOPX" ); \
|
||
|
|
-if ((n) & TC_IN ) debug_printf_parse(" IN" ); \
|
||
|
|
-if ((n) & TC_COMMA ) debug_printf_parse(" COMMA" ); \
|
||
|
|
-if ((n) & TC_PIPE ) debug_printf_parse(" PIPE" ); \
|
||
|
|
-if ((n) & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" ); \
|
||
|
|
-if ((n) & TC_ARRTERM ) debug_printf_parse(" ARRTERM" ); \
|
||
|
|
-if ((n) & TC_LBRACE ) debug_printf_parse(" LBRACE" ); \
|
||
|
|
-if ((n) & TC_RBRACE ) debug_printf_parse(" RBRACE" ); \
|
||
|
|
-if ((n) & TC_SEMICOL ) debug_printf_parse(" SEMICOL" ); \
|
||
|
|
-if ((n) & TC_NEWLINE ) debug_printf_parse(" NEWLINE" ); \
|
||
|
|
-if ((n) & TC_STATX ) debug_printf_parse(" STATX" ); \
|
||
|
|
-if ((n) & TC_WHILE ) debug_printf_parse(" WHILE" ); \
|
||
|
|
-if ((n) & TC_ELSE ) debug_printf_parse(" ELSE" ); \
|
||
|
|
-if ((n) & TC_BUILTIN ) debug_printf_parse(" BUILTIN" ); \
|
||
|
|
-if ((n) & TC_LENGTH ) debug_printf_parse(" LENGTH" ); \
|
||
|
|
-if ((n) & TC_GETLINE ) debug_printf_parse(" GETLINE" ); \
|
||
|
|
-if ((n) & TC_FUNCDECL) debug_printf_parse(" FUNCDECL"); \
|
||
|
|
-if ((n) & TC_BEGIN ) debug_printf_parse(" BEGIN" ); \
|
||
|
|
-if ((n) & TC_END ) debug_printf_parse(" END" ); \
|
||
|
|
-if ((n) & TC_EOF ) debug_printf_parse(" EOF" ); \
|
||
|
|
-if ((n) & TC_VARIABLE) debug_printf_parse(" VARIABLE"); \
|
||
|
|
-if ((n) & TC_ARRAY ) debug_printf_parse(" ARRAY" ); \
|
||
|
|
-if ((n) & TC_FUNCTION) debug_printf_parse(" FUNCTION"); \
|
||
|
|
-if ((n) & TC_STRING ) debug_printf_parse(" STRING" ); \
|
||
|
|
-if ((n) & TC_NUMBER ) debug_printf_parse(" NUMBER" ); \
|
||
|
|
-} while (0)
|
||
|
|
+static void debug_parse_print_tc(uint32_t n)
|
||
|
|
+{
|
||
|
|
+ if (n & TC_LPAREN ) debug_printf_parse(" LPAREN" );
|
||
|
|
+ if (n & TC_RPAREN ) debug_printf_parse(" RPAREN" );
|
||
|
|
+ if (n & TC_REGEXP ) debug_printf_parse(" REGEXP" );
|
||
|
|
+ if (n & TC_OUTRDR ) debug_printf_parse(" OUTRDR" );
|
||
|
|
+ if (n & TC_UOPPOST ) debug_printf_parse(" UOPPOST" );
|
||
|
|
+ if (n & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" );
|
||
|
|
+ if (n & TC_BINOPX ) debug_printf_parse(" BINOPX" );
|
||
|
|
+ if (n & TC_IN ) debug_printf_parse(" IN" );
|
||
|
|
+ if (n & TC_COMMA ) debug_printf_parse(" COMMA" );
|
||
|
|
+ if (n & TC_PIPE ) debug_printf_parse(" PIPE" );
|
||
|
|
+ if (n & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" );
|
||
|
|
+ if (n & TC_ARRTERM ) debug_printf_parse(" ARRTERM" );
|
||
|
|
+ if (n & TC_LBRACE ) debug_printf_parse(" LBRACE" );
|
||
|
|
+ if (n & TC_RBRACE ) debug_printf_parse(" RBRACE" );
|
||
|
|
+ if (n & TC_SEMICOL ) debug_printf_parse(" SEMICOL" );
|
||
|
|
+ if (n & TC_NEWLINE ) debug_printf_parse(" NEWLINE" );
|
||
|
|
+ if (n & TC_STATX ) debug_printf_parse(" STATX" );
|
||
|
|
+ if (n & TC_WHILE ) debug_printf_parse(" WHILE" );
|
||
|
|
+ if (n & TC_ELSE ) debug_printf_parse(" ELSE" );
|
||
|
|
+ if (n & TC_BUILTIN ) debug_printf_parse(" BUILTIN" );
|
||
|
|
+ if (n & TC_LENGTH ) debug_printf_parse(" LENGTH" );
|
||
|
|
+ if (n & TC_GETLINE ) debug_printf_parse(" GETLINE" );
|
||
|
|
+ if (n & TC_FUNCDECL) debug_printf_parse(" FUNCDECL");
|
||
|
|
+ if (n & TC_BEGIN ) debug_printf_parse(" BEGIN" );
|
||
|
|
+ if (n & TC_END ) debug_printf_parse(" END" );
|
||
|
|
+ if (n & TC_EOF ) debug_printf_parse(" EOF" );
|
||
|
|
+ if (n & TC_VARIABLE) debug_printf_parse(" VARIABLE");
|
||
|
|
+ if (n & TC_ARRAY ) debug_printf_parse(" ARRAY" );
|
||
|
|
+ if (n & TC_FUNCTION) debug_printf_parse(" FUNCTION");
|
||
|
|
+ if (n & TC_STRING ) debug_printf_parse(" STRING" );
|
||
|
|
+ if (n & TC_NUMBER ) debug_printf_parse(" NUMBER" );
|
||
|
|
+}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
/* combined token classes ("token [class] sets") */
|
||
|
|
@@ -417,7 +418,7 @@ static const char tokenlist[] ALIGN1 =
|
||
|
|
"\5close" "\6system" "\6fflush" "\5atan2"
|
||
|
|
"\3cos" "\3exp" "\3int" "\3log"
|
||
|
|
"\4rand" "\3sin" "\4sqrt" "\5srand"
|
||
|
|
- "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
|
||
|
|
+ "\6gensub" "\4gsub" "\5index" /* "\6length" was here */
|
||
|
|
"\5match" "\5split" "\7sprintf" "\3sub"
|
||
|
|
"\6substr" "\7systime" "\10strftime" "\6mktime"
|
||
|
|
"\7tolower" "\7toupper" NTC
|
||
|
|
@@ -1802,7 +1803,6 @@ static void parse_program(char *p)
|
||
|
|
} /* for (;;) */
|
||
|
|
}
|
||
|
|
|
||
|
|
-
|
||
|
|
/* -------- program execution part -------- */
|
||
|
|
|
||
|
|
/* temporary variables allocator */
|
||
|
|
@@ -3510,7 +3510,6 @@ static var *evaluate(node *op, var *res)
|
||
|
|
#undef sreg
|
||
|
|
}
|
||
|
|
|
||
|
|
-
|
||
|
|
/* -------- main & co. -------- */
|
||
|
|
|
||
|
|
static int awk_exit(void)
|
||
|
|
--
|
||
|
|
2.27.0
|
||
|
|
|