From 0ffcae6adfe1be48124429484a39acdf32979549 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 11:14:04 +0300 Subject: [PATCH 01/15] lib-mail: test-message-parser - Add another test for boundary matching --- src/lib-mail/test-message-parser.c | 71 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c index 4f4d684083..529a36d59c 100644 --- a/src/lib-mail/test-message-parser.c +++ b/src/lib-mail/test-message-parser.c @@ -613,6 +613,76 @@ static const char input_msg[] = test_end(); } +static void test_message_parser_continuing_mime_boundary_reverse(void) +{ +static const char input_msg[] = +"Content-Type: multipart/mixed; boundary=\"ab\"\n" +"\n" +"--ab\n" +"Content-Type: multipart/mixed; boundary=\"a\"\n" +"\n" +"--a\n" +"Content-Type: text/plain\n" +"\n" +"body\n" +"--ab\n" +"Content-Type: text/html\n" +"\n" +"body2\n"; + struct message_parser_ctx *parser; + struct istream *input; + struct message_part *parts; + struct message_block block; + pool_t pool; + int ret; + + test_begin("message parser continuing mime boundary reverse"); + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + + parser = message_parser_init(pool, input, 0, 0); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); + + test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->header_size.lines == 2); + test_assert(parts->header_size.physical_size == 46); + test_assert(parts->header_size.virtual_size == 46+2); + test_assert(parts->body_size.lines == 11); + test_assert(parts->body_size.physical_size == 121); + test_assert(parts->body_size.virtual_size == 121+11); + test_assert(parts->children->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->physical_pos == 51); + test_assert(parts->children->header_size.lines == 2); + test_assert(parts->children->header_size.physical_size == 45); + test_assert(parts->children->header_size.virtual_size == 45+2); + test_assert(parts->children->body_size.lines == 3); + test_assert(parts->children->body_size.physical_size == 34); + test_assert(parts->children->body_size.virtual_size == 34+3); + test_assert(parts->children->children->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->children->physical_pos == 100); + test_assert(parts->children->children->header_size.lines == 2); + test_assert(parts->children->children->header_size.physical_size == 26); + test_assert(parts->children->children->header_size.virtual_size == 26+2); + test_assert(parts->children->children->body_size.lines == 0); + test_assert(parts->children->children->body_size.physical_size == 4); + test_assert(parts->children->children->body_size.virtual_size == 4); + test_assert(parts->children->next->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children->next->physical_pos == 136); + test_assert(parts->children->next->header_size.lines == 2); + test_assert(parts->children->next->header_size.physical_size == 25); + test_assert(parts->children->next->header_size.virtual_size == 25+2); + test_assert(parts->children->next->body_size.lines == 1); + test_assert(parts->children->next->body_size.physical_size == 6); + test_assert(parts->children->next->body_size.virtual_size == 6+1); + + test_parsed_parts(input, parts); + i_stream_unref(&input); + pool_unref(&pool); + test_end(); +} + static void test_message_parser_no_eoh(void) { static const char input_msg[] = "a:b\n"; @@ -653,6 +723,7 @@ int main(void) test_message_parser_garbage_suffix_mime_boundary, test_message_parser_continuing_mime_boundary, test_message_parser_continuing_truncated_mime_boundary, + test_message_parser_continuing_mime_boundary_reverse, test_message_parser_no_eoh, NULL }; -- 2.11.0 From 6defb282bec6b17bd1c0e2f62a4d365b453c39ef Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 11:27:14 +0300 Subject: [PATCH 02/15] lib-mail: test-message-parser - Test that children_count is correct --- src/lib-mail/test-message-parser.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c index 529a36d59c..c275707265 100644 --- a/src/lib-mail/test-message-parser.c +++ b/src/lib-mail/test-message-parser.c @@ -59,6 +59,7 @@ static bool msg_parts_cmp(struct message_part *p1, struct message_part *p2) p1->body_size.physical_size != p2->body_size.physical_size || p1->body_size.virtual_size != p2->body_size.virtual_size || p1->body_size.lines != p2->body_size.lines || + p1->children_count != p2->children_count || p1->flags != p2->flags) return FALSE; @@ -195,6 +196,7 @@ static const char input_msg[] = message_parser_deinit(&parser, &parts); test_assert((parts->flags & MESSAGE_PART_FLAG_MULTIPART) != 0); + test_assert(parts->children_count == 4); test_assert(parts->header_size.lines == 2); test_assert(parts->header_size.physical_size == 48); test_assert(parts->header_size.virtual_size == 48+2); @@ -218,6 +220,7 @@ static const char input_msg[] = test_assert(parts->children->next->next->next->header_size.virtual_size == 23); test_assert(parts->children->next->next->next->header_size.lines == 0); for (part = parts->children; part != NULL; part = part->next) { + test_assert(part->children_count == 0); test_assert(part->body_size.physical_size == 0); test_assert(part->body_size.virtual_size == 0); } @@ -258,6 +261,7 @@ static const char input_msg[] = message_parser_deinit(&parser, &parts); test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(parts->children_count == 2); test_assert(parts->header_size.lines == 2); test_assert(parts->header_size.physical_size == 46); test_assert(parts->header_size.virtual_size == 46+2); @@ -265,6 +269,7 @@ static const char input_msg[] = test_assert(parts->body_size.physical_size == 86); test_assert(parts->body_size.virtual_size == 86+8); + test_assert(parts->children->children_count == 0); test_assert(parts->children->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->children->physical_pos == 51); test_assert(parts->children->header_size.lines == 1); @@ -274,6 +279,7 @@ static const char input_msg[] = test_assert(parts->children->body_size.physical_size == 0); test_assert(parts->children->children == NULL); + test_assert(parts->children->next->children_count == 0); test_assert(parts->children->next->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->children->next->physical_pos == 101); test_assert(parts->children->next->header_size.lines == 2); @@ -310,6 +316,7 @@ static const char input_msg[] = test_assert(ret < 0); message_parser_deinit(&parser, &parts); + test_assert(parts->children_count == 0); test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->header_size.lines == 1); test_assert(parts->header_size.physical_size == 45); @@ -347,6 +354,7 @@ static const char input_msg[] = test_assert(ret < 0); message_parser_deinit(&parser, &parts); + test_assert(parts->children_count == 0); test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->header_size.lines == 2); test_assert(parts->header_size.physical_size == 46); @@ -391,6 +399,7 @@ static const char input_msg[] = test_assert(ret < 0); message_parser_deinit(&parser, &parts); + test_assert(parts->children_count == 2); test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->header_size.lines == 2); test_assert(parts->header_size.physical_size == 45); @@ -398,6 +407,7 @@ static const char input_msg[] = test_assert(parts->body_size.lines == 7); test_assert(parts->body_size.physical_size == 84); test_assert(parts->body_size.virtual_size == 84+7); + test_assert(parts->children->children_count == 1); test_assert(parts->children->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->children->physical_pos == 49); test_assert(parts->children->header_size.lines == 2); @@ -406,6 +416,7 @@ static const char input_msg[] = test_assert(parts->children->body_size.lines == 4); test_assert(parts->children->body_size.physical_size == 35); test_assert(parts->children->body_size.virtual_size == 35+4); + test_assert(parts->children->children->children_count == 0); test_assert(parts->children->children->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->children->children->physical_pos == 98); test_assert(parts->children->children->header_size.lines == 2); @@ -449,6 +460,7 @@ static const char input_msg[] = test_assert(ret < 0); message_parser_deinit(&parser, &parts); + test_assert(parts->children_count == 2); test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->header_size.lines == 2); test_assert(parts->header_size.physical_size == 45); @@ -456,6 +468,7 @@ static const char input_msg[] = test_assert(parts->body_size.lines == 7); test_assert(parts->body_size.physical_size == 86); test_assert(parts->body_size.virtual_size == 86+7); + test_assert(parts->children->children_count == 1); test_assert(parts->children->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->children->physical_pos == 50); test_assert(parts->children->header_size.lines == 2); @@ -464,6 +477,7 @@ static const char input_msg[] = test_assert(parts->children->body_size.lines == 4); test_assert(parts->children->body_size.physical_size == 36); test_assert(parts->children->body_size.virtual_size == 36+4); + test_assert(parts->children->children->children_count == 0); test_assert(parts->children->children->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->children->children->physical_pos == 100); test_assert(parts->children->children->header_size.lines == 2); @@ -507,6 +521,7 @@ static const char input_msg[] = test_assert(ret < 0); message_parser_deinit(&parser, &parts); + test_assert(parts->children_count == 2); test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->header_size.lines == 2); test_assert(parts->header_size.physical_size == 45); @@ -514,6 +529,7 @@ static const char input_msg[] = test_assert(parts->body_size.lines == 7); test_assert(parts->body_size.physical_size == 86); test_assert(parts->body_size.virtual_size == 86+7); + test_assert(parts->children->children_count == 1); test_assert(parts->children->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->children->physical_pos == 49); test_assert(parts->children->header_size.lines == 2); @@ -522,6 +538,7 @@ static const char input_msg[] = test_assert(parts->children->body_size.lines == 4); test_assert(parts->children->body_size.physical_size == 36); test_assert(parts->children->body_size.virtual_size == 36+4); + test_assert(parts->children->children->children_count == 0); test_assert(parts->children->children->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->children->children->physical_pos == 100); test_assert(parts->children->children->header_size.lines == 2); @@ -567,6 +584,7 @@ static const char input_msg[] = message_parser_deinit(&parser, &parts); part = parts; + test_assert(part->children_count == 3); test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(part->header_size.lines == 2); test_assert(part->header_size.physical_size == 45); @@ -576,6 +594,7 @@ static const char input_msg[] = test_assert(part->body_size.virtual_size == 112+9); part = parts->children; + test_assert(part->children_count == 0); test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(part->physical_pos == 49); test_assert(part->header_size.lines == 1); @@ -589,6 +608,7 @@ static const char input_msg[] = we could make it, but it would complicate the message-parser even more. */ part = parts->children->next; + test_assert(part->children_count == 0); test_assert(part->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); test_assert(part->physical_pos == 117); test_assert(part->header_size.lines == 1); @@ -599,6 +619,7 @@ static const char input_msg[] = test_assert(part->children == NULL); part = parts->children->next->next; + test_assert(part->children_count == 0); test_assert(part->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); test_assert(part->header_size.lines == 0); test_assert(part->header_size.physical_size == 0); @@ -645,6 +666,7 @@ static const char input_msg[] = test_assert(ret < 0); message_parser_deinit(&parser, &parts); + test_assert(parts->children_count == 3); test_assert(parts->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->header_size.lines == 2); test_assert(parts->header_size.physical_size == 46); @@ -652,6 +674,7 @@ static const char input_msg[] = test_assert(parts->body_size.lines == 11); test_assert(parts->body_size.physical_size == 121); test_assert(parts->body_size.virtual_size == 121+11); + test_assert(parts->children->children_count == 1); test_assert(parts->children->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->children->physical_pos == 51); test_assert(parts->children->header_size.lines == 2); @@ -660,6 +683,7 @@ static const char input_msg[] = test_assert(parts->children->body_size.lines == 3); test_assert(parts->children->body_size.physical_size == 34); test_assert(parts->children->body_size.virtual_size == 34+3); + test_assert(parts->children->children->children_count == 0); test_assert(parts->children->children->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->children->children->physical_pos == 100); test_assert(parts->children->children->header_size.lines == 2); @@ -668,6 +692,7 @@ static const char input_msg[] = test_assert(parts->children->children->body_size.lines == 0); test_assert(parts->children->children->body_size.physical_size == 4); test_assert(parts->children->children->body_size.virtual_size == 4); + test_assert(parts->children->next->children_count == 0); test_assert(parts->children->next->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); test_assert(parts->children->next->physical_pos == 136); test_assert(parts->children->next->header_size.lines == 2); -- 2.11.0 From 354d5b43a93b6f2ba80c7fc507b75fb1dc4c8df5 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 11:47:18 +0300 Subject: [PATCH 03/15] lib-mail: Move message_parser_init_from_parts() handling to its own file This helps to see what code they have in common. --- src/lib-mail/Makefile.am | 4 +- src/lib-mail/message-parser-from-parts.c | 366 +++++++++++++++++++++++++++ src/lib-mail/message-parser-private.h | 55 +++++ src/lib-mail/message-parser.c | 410 +------------------------------ 4 files changed, 428 insertions(+), 407 deletions(-) create mode 100644 src/lib-mail/message-parser-from-parts.c create mode 100644 src/lib-mail/message-parser-private.h diff --git a/src/lib-mail/Makefile.am b/src/lib-mail/Makefile.am index 8fe43d69d0..57d9e2b8c4 100644 --- a/src/lib-mail/Makefile.am +++ b/src/lib-mail/Makefile.am @@ -28,6 +28,7 @@ libmail_la_SOURCES = \ message-header-parser.c \ message-id.c \ message-parser.c \ + message-parser-from-parts.c \ message-part.c \ message-part-data.c \ message-part-serialize.c \ @@ -42,7 +43,8 @@ libmail_la_SOURCES = \ rfc822-parser.c noinst_HEADERS = \ - html-entities.h + html-entities.h \ + message-parser-private.h headers = \ istream-attachment-connector.h \ diff --git a/src/lib-mail/message-parser-from-parts.c b/src/lib-mail/message-parser-from-parts.c new file mode 100644 index 0000000000..b23055ab9b --- /dev/null +++ b/src/lib-mail/message-parser-from-parts.c @@ -0,0 +1,366 @@ +/* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "istream.h" +#include "message-parser-private.h" + +static int preparsed_parse_epilogue_init(struct message_parser_ctx *ctx, + struct message_block *block_r); +static int preparsed_parse_next_header_init(struct message_parser_ctx *ctx, + struct message_block *block_r); + +static int preparsed_parse_eof(struct message_parser_ctx *ctx ATTR_UNUSED, + struct message_block *block_r ATTR_UNUSED) +{ + return -1; +} + +static void preparsed_skip_to_next(struct message_parser_ctx *ctx) +{ + ctx->parse_next_block = preparsed_parse_next_header_init; + while (ctx->part != NULL) { + if (ctx->part->next != NULL) { + ctx->part = ctx->part->next; + break; + } + + /* parse epilogue of multipart parent if requested */ + if (ctx->part->parent != NULL && + (ctx->part->parent->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 && + (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) != 0) { + /* check for presence of epilogue */ + uoff_t part_end = ctx->part->physical_pos + + ctx->part->header_size.physical_size + + ctx->part->body_size.physical_size; + uoff_t parent_end = ctx->part->parent->physical_pos + + ctx->part->parent->header_size.physical_size + + ctx->part->parent->body_size.physical_size; + + if (parent_end > part_end) { + ctx->parse_next_block = preparsed_parse_epilogue_init; + break; + } + } + ctx->part = ctx->part->parent; + } + if (ctx->part == NULL) + ctx->parse_next_block = preparsed_parse_eof; +} + +static int preparsed_parse_body_finish(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + i_stream_skip(ctx->input, ctx->skip); + ctx->skip = 0; + + preparsed_skip_to_next(ctx); + return ctx->parse_next_block(ctx, block_r); +} + +static int preparsed_parse_prologue_finish(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + i_stream_skip(ctx->input, ctx->skip); + ctx->skip = 0; + + ctx->parse_next_block = preparsed_parse_next_header_init; + ctx->part = ctx->part->children; + return ctx->parse_next_block(ctx, block_r); +} + +static int preparsed_parse_body_more(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + uoff_t end_offset = ctx->part->physical_pos + + ctx->part->header_size.physical_size + + ctx->part->body_size.physical_size; + bool full; + int ret; + + if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) + return ret; + + if (ctx->input->v_offset + block_r->size >= end_offset) { + block_r->size = end_offset - ctx->input->v_offset; + ctx->parse_next_block = preparsed_parse_body_finish; + } + ctx->skip = block_r->size; + return 1; +} + +static int preparsed_parse_prologue_more(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + uoff_t boundary_min_start, end_offset; + const unsigned char *cur; + bool full; + int ret; + + i_assert(ctx->part->children != NULL); + end_offset = ctx->part->children->physical_pos; + + if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) + return ret; + + if (ctx->input->v_offset + block_r->size >= end_offset) { + /* we've got the full prologue: clip off the initial boundary */ + block_r->size = end_offset - ctx->input->v_offset; + cur = block_r->data + block_r->size - 1; + + /* [\r]\n--boundary[\r]\n */ + if (block_r->size < 5 || *cur != '\n') { + ctx->broken_reason = "Prologue boundary end not at expected position"; + return -1; + } + + cur--; + if (*cur == '\r') cur--; + + /* find newline just before boundary */ + for (; cur >= block_r->data; cur--) { + if (*cur == '\n') break; + } + + if (cur[0] != '\n' || cur[1] != '-' || cur[2] != '-') { + ctx->broken_reason = "Prologue boundary beginning not at expected position"; + return -1; + } + + if (cur != block_r->data && cur[-1] == '\r') cur--; + + /* clip boundary */ + block_r->size = cur - block_r->data; + + ctx->parse_next_block = preparsed_parse_prologue_finish; + ctx->skip = block_r->size; + return 1; + } + + /* retain enough data in the stream buffer to contain initial boundary */ + if (end_offset > BOUNDARY_END_MAX_LEN) + boundary_min_start = end_offset - BOUNDARY_END_MAX_LEN; + else + boundary_min_start = 0; + + if (ctx->input->v_offset + block_r->size >= boundary_min_start) { + if (boundary_min_start <= ctx->input->v_offset) + return 0; + block_r->size = boundary_min_start - ctx->input->v_offset; + } + ctx->skip = block_r->size; + return 1; +} + +static int preparsed_parse_epilogue_more(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + uoff_t end_offset = ctx->part->physical_pos + + ctx->part->header_size.physical_size + + ctx->part->body_size.physical_size; + bool full; + int ret; + + if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) + return ret; + + if (ctx->input->v_offset + block_r->size >= end_offset) { + block_r->size = end_offset - ctx->input->v_offset; + ctx->parse_next_block = preparsed_parse_body_finish; + } + ctx->skip = block_r->size; + return 1; +} + +static int preparsed_parse_epilogue_boundary(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + uoff_t end_offset = ctx->part->physical_pos + + ctx->part->header_size.physical_size + + ctx->part->body_size.physical_size; + const unsigned char *data, *cur; + size_t size; + bool full; + int ret; + + if (end_offset - ctx->input->v_offset < 7) { + ctx->broken_reason = "Epilogue position is wrong"; + return -1; + } + + if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) + return ret; + + /* [\r]\n--boundary--[\r]\n */ + if (block_r->size < 7) { + ctx->want_count = 7; + return 0; + } + + data = block_r->data; + size = block_r->size; + cur = data; + + if (*cur == '\r') cur++; + + if (cur[0] != '\n' || cur[1] != '-' || data[2] != '-') { + ctx->broken_reason = "Epilogue boundary start not at expected position"; + return -1; + } + + /* find the end of the line */ + cur += 3; + if ((cur = memchr(cur, '\n', size - (cur-data))) == NULL) { + if (end_offset < ctx->input->v_offset + size) { + ctx->broken_reason = "Epilogue boundary end not at expected position"; + return -1; + } else if (ctx->input->v_offset + size < end_offset && + size < BOUNDARY_END_MAX_LEN && + !ctx->input->eof && !full) { + ctx->want_count = BOUNDARY_END_MAX_LEN; + return 0; + } + } + + block_r->size = 0; + ctx->parse_next_block = preparsed_parse_epilogue_more; + ctx->skip = cur - data + 1; + return 0; +} + +static int preparsed_parse_body_init(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + uoff_t offset = ctx->part->physical_pos + + ctx->part->header_size.physical_size; + + if (offset < ctx->input->v_offset) { + /* header was actually larger than the cached size suggested */ + ctx->broken_reason = "Header larger than its cached size"; + return -1; + } + i_stream_skip(ctx->input, offset - ctx->input->v_offset); + + /* multipart messages may begin with --boundary--, which makes them + not have any children. */ + if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 || + ctx->part->children == NULL) + ctx->parse_next_block = preparsed_parse_body_more; + else + ctx->parse_next_block = preparsed_parse_prologue_more; + return ctx->parse_next_block(ctx, block_r); +} + +static int preparsed_parse_epilogue_init(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + uoff_t offset = ctx->part->physical_pos + + ctx->part->header_size.physical_size + + ctx->part->body_size.physical_size; + + ctx->part = ctx->part->parent; + + if (offset < ctx->input->v_offset) { + /* last child was actually larger than the cached size + suggested */ + ctx->broken_reason = "Part larger than its cached size"; + return -1; + } + i_stream_skip(ctx->input, offset - ctx->input->v_offset); + + ctx->parse_next_block = preparsed_parse_epilogue_boundary; + return ctx->parse_next_block(ctx, block_r); +} + +static int preparsed_parse_finish_header(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + if (ctx->part->children != NULL) { + if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 && + (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) != 0) + ctx->parse_next_block = preparsed_parse_body_init; + else { + ctx->parse_next_block = preparsed_parse_next_header_init; + ctx->part = ctx->part->children; + } + } else if ((ctx->flags & MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK) == 0) { + ctx->parse_next_block = preparsed_parse_body_init; + } else { + preparsed_skip_to_next(ctx); + } + return ctx->parse_next_block(ctx, block_r); +} + +static int preparsed_parse_next_header(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + struct message_header_line *hdr; + int ret; + + ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr); + if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) { + ctx->want_count = i_stream_get_data_size(ctx->input) + 1; + return ret; + } + + if (hdr != NULL) { + block_r->hdr = hdr; + block_r->size = 0; + return 1; + } + message_parse_header_deinit(&ctx->hdr_parser_ctx); + + ctx->parse_next_block = preparsed_parse_finish_header; + + /* return empty block as end of headers */ + block_r->hdr = NULL; + block_r->size = 0; + + i_assert(ctx->skip == 0); + if (ctx->input->v_offset != ctx->part->physical_pos + + ctx->part->header_size.physical_size) { + ctx->broken_reason = "Cached header size mismatch"; + return -1; + } + return 1; +} + +static int preparsed_parse_next_header_init(struct message_parser_ctx *ctx, + struct message_block *block_r) +{ + struct istream *hdr_input; + + i_assert(ctx->hdr_parser_ctx == NULL); + + i_assert(ctx->part->physical_pos >= ctx->input->v_offset); + i_stream_skip(ctx->input, ctx->part->physical_pos - + ctx->input->v_offset); + + /* the header may become truncated by --boundaries. limit the header + stream's size to what it's supposed to be to avoid duplicating (and + keeping in sync!) all the same complicated logic as in + parse_next_header(). */ + hdr_input = i_stream_create_limit(ctx->input, ctx->part->header_size.physical_size); + ctx->hdr_parser_ctx = + message_parse_header_init(hdr_input, NULL, ctx->hdr_flags); + i_stream_unref(&hdr_input); + + ctx->parse_next_block = preparsed_parse_next_header; + return preparsed_parse_next_header(ctx, block_r); +} + +struct message_parser_ctx * +message_parser_init_from_parts(struct message_part *parts, + struct istream *input, + enum message_header_parser_flags hdr_flags, + enum message_parser_flags flags) +{ + struct message_parser_ctx *ctx; + + i_assert(parts != NULL); + + ctx = message_parser_init_int(input, hdr_flags, flags); + ctx->preparsed = TRUE; + ctx->parts = ctx->part = parts; + ctx->parse_next_block = preparsed_parse_next_header_init; + return ctx; +} diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h new file mode 100644 index 0000000000..98a576eda4 --- /dev/null +++ b/src/lib-mail/message-parser-private.h @@ -0,0 +1,55 @@ +#ifndef MESSAGE_PARSER_PRIVATE_H +#define MESSAGE_PARSER_PRIVATE_H + +#include "message-parser.h" + +/* RFC-2046 requires boundaries are max. 70 chars + "--" prefix + "--" suffix. + We'll add a bit more just in case. */ +#define BOUNDARY_END_MAX_LEN (70 + 2 + 2 + 10) + +struct message_boundary { + struct message_boundary *next; + + struct message_part *part; + const char *boundary; + size_t len; + + bool epilogue_found:1; +}; + +struct message_parser_ctx { + pool_t parser_pool, part_pool; + struct istream *input; + struct message_part *parts, *part; + const char *broken_reason; + + enum message_header_parser_flags hdr_flags; + enum message_parser_flags flags; + + const char *last_boundary; + struct message_boundary *boundaries; + + size_t skip; + char last_chr; + unsigned int want_count; + + struct message_header_parser_ctx *hdr_parser_ctx; + unsigned int prev_hdr_newline_size; + + int (*parse_next_block)(struct message_parser_ctx *ctx, + struct message_block *block_r); + + bool part_seen_content_type:1; + bool multipart:1; + bool preparsed:1; + bool eof:1; +}; + +struct message_parser_ctx * +message_parser_init_int(struct istream *input, + enum message_header_parser_flags hdr_flags, + enum message_parser_flags flags); +int message_parser_read_more(struct message_parser_ctx *ctx, + struct message_block *block_r, bool *full_r); + +#endif diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index e7a4f4cc31..653f964118 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -6,49 +6,7 @@ #include "istream.h" #include "rfc822-parser.h" #include "rfc2231-parser.h" -#include "message-parser.h" - -/* RFC-2046 requires boundaries are max. 70 chars + "--" prefix + "--" suffix. - We'll add a bit more just in case. */ -#define BOUNDARY_END_MAX_LEN (70 + 2 + 2 + 10) - -struct message_boundary { - struct message_boundary *next; - - struct message_part *part; - const char *boundary; - size_t len; - - bool epilogue_found:1; -}; - -struct message_parser_ctx { - pool_t parser_pool, part_pool; - struct istream *input; - struct message_part *parts, *part; - const char *broken_reason; - - enum message_header_parser_flags hdr_flags; - enum message_parser_flags flags; - - const char *last_boundary; - struct message_boundary *boundaries; - - size_t skip; - char last_chr; - unsigned int want_count; - - struct message_header_parser_ctx *hdr_parser_ctx; - unsigned int prev_hdr_newline_size; - - int (*parse_next_block)(struct message_parser_ctx *ctx, - struct message_block *block_r); - - bool part_seen_content_type:1; - bool multipart:1; - bool preparsed:1; - bool eof:1; -}; +#include "message-parser-private.h" message_part_header_callback_t *null_message_part_header_callback = NULL; @@ -58,10 +16,6 @@ static int parse_next_body_to_boundary(struct message_parser_ctx *ctx, struct message_block *block_r); static int parse_next_body_to_eof(struct message_parser_ctx *ctx, struct message_block *block_r); -static int preparsed_parse_epilogue_init(struct message_parser_ctx *ctx, - struct message_block *block_r); -static int preparsed_parse_next_header_init(struct message_parser_ctx *ctx, - struct message_block *block_r); static struct message_boundary * boundary_find(struct message_boundary *boundaries, @@ -122,8 +76,8 @@ static void parse_body_add_block(struct message_parser_ctx *ctx, ctx->part->body_size.virtual_size += block->size + missing_cr_count; } -static int message_parser_read_more(struct message_parser_ctx *ctx, - struct message_block *block_r, bool *full_r) +int message_parser_read_more(struct message_parser_ctx *ctx, + struct message_block *block_r, bool *full_r) { int ret; @@ -692,346 +646,7 @@ static int parse_next_header_init(struct message_parser_ctx *ctx, return parse_next_header(ctx, block_r); } -static int preparsed_parse_eof(struct message_parser_ctx *ctx ATTR_UNUSED, - struct message_block *block_r ATTR_UNUSED) -{ - return -1; -} - -static void preparsed_skip_to_next(struct message_parser_ctx *ctx) -{ - ctx->parse_next_block = preparsed_parse_next_header_init; - while (ctx->part != NULL) { - if (ctx->part->next != NULL) { - ctx->part = ctx->part->next; - break; - } - - /* parse epilogue of multipart parent if requested */ - if (ctx->part->parent != NULL && - (ctx->part->parent->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 && - (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) != 0) { - /* check for presence of epilogue */ - uoff_t part_end = ctx->part->physical_pos + - ctx->part->header_size.physical_size + - ctx->part->body_size.physical_size; - uoff_t parent_end = ctx->part->parent->physical_pos + - ctx->part->parent->header_size.physical_size + - ctx->part->parent->body_size.physical_size; - - if (parent_end > part_end) { - ctx->parse_next_block = preparsed_parse_epilogue_init; - break; - } - } - ctx->part = ctx->part->parent; - } - if (ctx->part == NULL) - ctx->parse_next_block = preparsed_parse_eof; -} - -static int preparsed_parse_body_finish(struct message_parser_ctx *ctx, - struct message_block *block_r) -{ - i_stream_skip(ctx->input, ctx->skip); - ctx->skip = 0; - - preparsed_skip_to_next(ctx); - return ctx->parse_next_block(ctx, block_r); -} - -static int preparsed_parse_prologue_finish(struct message_parser_ctx *ctx, - struct message_block *block_r) -{ - i_stream_skip(ctx->input, ctx->skip); - ctx->skip = 0; - - ctx->parse_next_block = preparsed_parse_next_header_init; - ctx->part = ctx->part->children; - return ctx->parse_next_block(ctx, block_r); -} - -static int preparsed_parse_body_more(struct message_parser_ctx *ctx, - struct message_block *block_r) -{ - uoff_t end_offset = ctx->part->physical_pos + - ctx->part->header_size.physical_size + - ctx->part->body_size.physical_size; - bool full; - int ret; - - if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) - return ret; - - if (ctx->input->v_offset + block_r->size >= end_offset) { - block_r->size = end_offset - ctx->input->v_offset; - ctx->parse_next_block = preparsed_parse_body_finish; - } - ctx->skip = block_r->size; - return 1; -} - -static int preparsed_parse_prologue_more(struct message_parser_ctx *ctx, - struct message_block *block_r) -{ - uoff_t boundary_min_start, end_offset; - const unsigned char *cur; - bool full; - int ret; - - i_assert(ctx->part->children != NULL); - end_offset = ctx->part->children->physical_pos; - - if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) - return ret; - - if (ctx->input->v_offset + block_r->size >= end_offset) { - /* we've got the full prologue: clip off the initial boundary */ - block_r->size = end_offset - ctx->input->v_offset; - cur = block_r->data + block_r->size - 1; - - /* [\r]\n--boundary[\r]\n */ - if (block_r->size < 5 || *cur != '\n') { - ctx->broken_reason = "Prologue boundary end not at expected position"; - return -1; - } - - cur--; - if (*cur == '\r') cur--; - - /* find newline just before boundary */ - for (; cur >= block_r->data; cur--) { - if (*cur == '\n') break; - } - - if (cur[0] != '\n' || cur[1] != '-' || cur[2] != '-') { - ctx->broken_reason = "Prologue boundary beginning not at expected position"; - return -1; - } - - if (cur != block_r->data && cur[-1] == '\r') cur--; - - /* clip boundary */ - block_r->size = cur - block_r->data; - - ctx->parse_next_block = preparsed_parse_prologue_finish; - ctx->skip = block_r->size; - return 1; - } - - /* retain enough data in the stream buffer to contain initial boundary */ - if (end_offset > BOUNDARY_END_MAX_LEN) - boundary_min_start = end_offset - BOUNDARY_END_MAX_LEN; - else - boundary_min_start = 0; - - if (ctx->input->v_offset + block_r->size >= boundary_min_start) { - if (boundary_min_start <= ctx->input->v_offset) - return 0; - block_r->size = boundary_min_start - ctx->input->v_offset; - } - ctx->skip = block_r->size; - return 1; -} - -static int preparsed_parse_epilogue_more(struct message_parser_ctx *ctx, - struct message_block *block_r) -{ - uoff_t end_offset = ctx->part->physical_pos + - ctx->part->header_size.physical_size + - ctx->part->body_size.physical_size; - bool full; - int ret; - - if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) - return ret; - - if (ctx->input->v_offset + block_r->size >= end_offset) { - block_r->size = end_offset - ctx->input->v_offset; - ctx->parse_next_block = preparsed_parse_body_finish; - } - ctx->skip = block_r->size; - return 1; -} - -static int preparsed_parse_epilogue_boundary(struct message_parser_ctx *ctx, - struct message_block *block_r) -{ - uoff_t end_offset = ctx->part->physical_pos + - ctx->part->header_size.physical_size + - ctx->part->body_size.physical_size; - const unsigned char *data, *cur; - size_t size; - bool full; - int ret; - - if (end_offset - ctx->input->v_offset < 7) { - ctx->broken_reason = "Epilogue position is wrong"; - return -1; - } - - if ((ret = message_parser_read_more(ctx, block_r, &full)) <= 0) - return ret; - - /* [\r]\n--boundary--[\r]\n */ - if (block_r->size < 7) { - ctx->want_count = 7; - return 0; - } - - data = block_r->data; - size = block_r->size; - cur = data; - - if (*cur == '\r') cur++; - - if (cur[0] != '\n' || cur[1] != '-' || data[2] != '-') { - ctx->broken_reason = "Epilogue boundary start not at expected position"; - return -1; - } - - /* find the end of the line */ - cur += 3; - if ((cur = memchr(cur, '\n', size - (cur-data))) == NULL) { - if (end_offset < ctx->input->v_offset + size) { - ctx->broken_reason = "Epilogue boundary end not at expected position"; - return -1; - } else if (ctx->input->v_offset + size < end_offset && - size < BOUNDARY_END_MAX_LEN && - !ctx->input->eof && !full) { - ctx->want_count = BOUNDARY_END_MAX_LEN; - return 0; - } - } - - block_r->size = 0; - ctx->parse_next_block = preparsed_parse_epilogue_more; - ctx->skip = cur - data + 1; - return 0; -} - -static int preparsed_parse_body_init(struct message_parser_ctx *ctx, - struct message_block *block_r) -{ - uoff_t offset = ctx->part->physical_pos + - ctx->part->header_size.physical_size; - - if (offset < ctx->input->v_offset) { - /* header was actually larger than the cached size suggested */ - ctx->broken_reason = "Header larger than its cached size"; - return -1; - } - i_stream_skip(ctx->input, offset - ctx->input->v_offset); - - /* multipart messages may begin with --boundary--, which makes them - not have any children. */ - if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) == 0 || - ctx->part->children == NULL) - ctx->parse_next_block = preparsed_parse_body_more; - else - ctx->parse_next_block = preparsed_parse_prologue_more; - return ctx->parse_next_block(ctx, block_r); -} - -static int preparsed_parse_epilogue_init(struct message_parser_ctx *ctx, - struct message_block *block_r) -{ - uoff_t offset = ctx->part->physical_pos + - ctx->part->header_size.physical_size + - ctx->part->body_size.physical_size; - - ctx->part = ctx->part->parent; - - if (offset < ctx->input->v_offset) { - /* last child was actually larger than the cached size - suggested */ - ctx->broken_reason = "Part larger than its cached size"; - return -1; - } - i_stream_skip(ctx->input, offset - ctx->input->v_offset); - - ctx->parse_next_block = preparsed_parse_epilogue_boundary; - return ctx->parse_next_block(ctx, block_r); -} - -static int preparsed_parse_finish_header(struct message_parser_ctx *ctx, - struct message_block *block_r) -{ - if (ctx->part->children != NULL) { - if ((ctx->part->flags & MESSAGE_PART_FLAG_MULTIPART) != 0 && - (ctx->flags & MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS) != 0) - ctx->parse_next_block = preparsed_parse_body_init; - else { - ctx->parse_next_block = preparsed_parse_next_header_init; - ctx->part = ctx->part->children; - } - } else if ((ctx->flags & MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK) == 0) { - ctx->parse_next_block = preparsed_parse_body_init; - } else { - preparsed_skip_to_next(ctx); - } - return ctx->parse_next_block(ctx, block_r); -} - -static int preparsed_parse_next_header(struct message_parser_ctx *ctx, - struct message_block *block_r) -{ - struct message_header_line *hdr; - int ret; - - ret = message_parse_header_next(ctx->hdr_parser_ctx, &hdr); - if (ret == 0 || (ret < 0 && ctx->input->stream_errno != 0)) { - ctx->want_count = i_stream_get_data_size(ctx->input) + 1; - return ret; - } - - if (hdr != NULL) { - block_r->hdr = hdr; - block_r->size = 0; - return 1; - } - message_parse_header_deinit(&ctx->hdr_parser_ctx); - - ctx->parse_next_block = preparsed_parse_finish_header; - - /* return empty block as end of headers */ - block_r->hdr = NULL; - block_r->size = 0; - - i_assert(ctx->skip == 0); - if (ctx->input->v_offset != ctx->part->physical_pos + - ctx->part->header_size.physical_size) { - ctx->broken_reason = "Cached header size mismatch"; - return -1; - } - return 1; -} - -static int preparsed_parse_next_header_init(struct message_parser_ctx *ctx, - struct message_block *block_r) -{ - struct istream *hdr_input; - - i_assert(ctx->hdr_parser_ctx == NULL); - - i_assert(ctx->part->physical_pos >= ctx->input->v_offset); - i_stream_skip(ctx->input, ctx->part->physical_pos - - ctx->input->v_offset); - - /* the header may become truncated by --boundaries. limit the header - stream's size to what it's supposed to be to avoid duplicating (and - keeping in sync!) all the same complicated logic as in - parse_next_header(). */ - hdr_input = i_stream_create_limit(ctx->input, ctx->part->header_size.physical_size); - ctx->hdr_parser_ctx = - message_parse_header_init(hdr_input, NULL, ctx->hdr_flags); - i_stream_unref(&hdr_input); - - ctx->parse_next_block = preparsed_parse_next_header; - return preparsed_parse_next_header(ctx, block_r); -} - -static struct message_parser_ctx * +struct message_parser_ctx * message_parser_init_int(struct istream *input, enum message_header_parser_flags hdr_flags, enum message_parser_flags flags) @@ -1063,23 +678,6 @@ message_parser_init(pool_t part_pool, struct istream *input, return ctx; } -struct message_parser_ctx * -message_parser_init_from_parts(struct message_part *parts, - struct istream *input, - enum message_header_parser_flags hdr_flags, - enum message_parser_flags flags) -{ - struct message_parser_ctx *ctx; - - i_assert(parts != NULL); - - ctx = message_parser_init_int(input, hdr_flags, flags); - ctx->preparsed = TRUE; - ctx->parts = ctx->part = parts; - ctx->parse_next_block = preparsed_parse_next_header_init; - return ctx; -} - void message_parser_deinit(struct message_parser_ctx **_ctx, struct message_part **parts_r) { -- 2.11.0 From 1765e533f1172e4823413268493e8d4198004f37 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 11:33:31 +0300 Subject: [PATCH 04/15] lib-mail: message-parser - Add a message_part_finish() helper function --- src/lib-mail/message-parser.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index 653f964118..9a98cc83a3 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -150,6 +150,13 @@ message_part_append(pool_t pool, struct message_part *parent) return part; } +static void message_part_finish(struct message_parser_ctx *ctx) +{ + message_size_add(&ctx->part->parent->body_size, &ctx->part->body_size); + message_size_add(&ctx->part->parent->body_size, &ctx->part->header_size); + ctx->part = ctx->part->parent; +} + static void parse_next_body_multipart_init(struct message_parser_ctx *ctx) { struct message_boundary *b; @@ -267,19 +274,16 @@ static int parse_part_finish(struct message_parser_ctx *ctx, struct message_boundary *boundary, struct message_block *block_r, bool first_line) { - struct message_part *part; size_t line_size; i_assert(ctx->last_boundary == NULL); /* get back to parent MIME part, summing the child MIME part sizes into parent's body sizes */ - for (part = ctx->part; part != boundary->part; part = part->parent) { - message_size_add(&part->parent->body_size, &part->body_size); - message_size_add(&part->parent->body_size, &part->header_size); + while (ctx->part != boundary->part) { + message_part_finish(ctx); + i_assert(ctx->part != NULL); } - i_assert(part != NULL); - ctx->part = part; if (boundary->epilogue_found) { /* this boundary isn't needed anymore */ @@ -734,13 +738,8 @@ int message_parser_parse_next_block(struct message_parser_ctx *ctx, i_assert(ctx->input->eof || ctx->input->closed || ctx->input->stream_errno != 0 || ctx->broken_reason != NULL); - while (ctx->part->parent != NULL) { - message_size_add(&ctx->part->parent->body_size, - &ctx->part->body_size); - message_size_add(&ctx->part->parent->body_size, - &ctx->part->header_size); - ctx->part = ctx->part->parent; - } + while (ctx->part->parent != NULL) + message_part_finish(ctx); } if (block_r->size == 0) { -- 2.11.0 From 2dd9d73d7ec0e64c8a6fa055ce4bc382639d1826 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 11:34:22 +0300 Subject: [PATCH 05/15] lib-mail: message-parser - Change message_part_append() to do all work internally --- src/lib-mail/message-parser.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index 9a98cc83a3..0edd4d267c 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -122,16 +122,17 @@ int message_parser_read_more(struct message_parser_ctx *ctx, return 1; } -static struct message_part * -message_part_append(pool_t pool, struct message_part *parent) +static void +message_part_append(struct message_parser_ctx *ctx) { + struct message_part *parent = ctx->part; struct message_part *p, *part, **list; i_assert(parent != NULL); i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_MESSAGE_RFC822)) != 0); - part = p_new(pool, struct message_part, 1); + part = p_new(ctx->part_pool, struct message_part, 1); part->parent = parent; for (p = parent; p != NULL; p = p->parent) p->children_count++; @@ -147,7 +148,7 @@ message_part_append(pool_t pool, struct message_part *parent) list = &(*list)->next; *list = part; - return part; + ctx->part = part; } static void message_part_finish(struct message_parser_ctx *ctx) @@ -175,7 +176,7 @@ static void parse_next_body_multipart_init(struct message_parser_ctx *ctx) static int parse_next_body_message_rfc822_init(struct message_parser_ctx *ctx, struct message_block *block_r) { - ctx->part = message_part_append(ctx->part_pool, ctx->part); + message_part_append(ctx); return parse_next_header_init(ctx, block_r); } @@ -225,7 +226,7 @@ boundary_line_find(struct message_parser_ctx *ctx, static int parse_next_mime_header_init(struct message_parser_ctx *ctx, struct message_block *block_r) { - ctx->part = message_part_append(ctx->part_pool, ctx->part); + message_part_append(ctx); ctx->part->flags |= MESSAGE_PART_FLAG_IS_MIME; return parse_next_header_init(ctx, block_r); -- 2.11.0 From 9f565d94ed7962f6c982387c25d093c34edbb5f0 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 11:36:48 +0300 Subject: [PATCH 06/15] lib-mail: message-parser - Optimize updating children_count --- src/lib-mail/message-parser.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index 0edd4d267c..f19759c257 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -126,7 +126,7 @@ static void message_part_append(struct message_parser_ctx *ctx) { struct message_part *parent = ctx->part; - struct message_part *p, *part, **list; + struct message_part *part, **list; i_assert(parent != NULL); i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART | @@ -134,8 +134,6 @@ message_part_append(struct message_parser_ctx *ctx) part = p_new(ctx->part_pool, struct message_part, 1); part->parent = parent; - for (p = parent; p != NULL; p = p->parent) - p->children_count++; /* set child position */ part->physical_pos = @@ -155,6 +153,7 @@ static void message_part_finish(struct message_parser_ctx *ctx) { message_size_add(&ctx->part->parent->body_size, &ctx->part->body_size); message_size_add(&ctx->part->parent->body_size, &ctx->part->header_size); + ctx->part->parent->children_count += 1 + ctx->part->children_count; ctx->part = ctx->part->parent; } -- 2.11.0 From f543817877ce91e59386eb83144e00e288a19839 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 12:00:38 +0300 Subject: [PATCH 07/15] lib-mail: message-parser - Optimize appending new part to linked list --- src/lib-mail/message-parser-private.h | 3 +++ src/lib-mail/message-parser.c | 24 ++++++++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h index 98a576eda4..fd92a48776 100644 --- a/src/lib-mail/message-parser-private.h +++ b/src/lib-mail/message-parser-private.h @@ -29,6 +29,9 @@ struct message_parser_ctx { const char *last_boundary; struct message_boundary *boundaries; + struct message_part **next_part; + ARRAY(struct message_part **) next_part_stack; + size_t skip; char last_chr; unsigned int want_count; diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index f19759c257..0f690ab689 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -1,7 +1,7 @@ /* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ #include "lib.h" -#include "buffer.h" +#include "array.h" #include "str.h" #include "istream.h" #include "rfc822-parser.h" @@ -126,7 +126,7 @@ static void message_part_append(struct message_parser_ctx *ctx) { struct message_part *parent = ctx->part; - struct message_part *part, **list; + struct message_part *part; i_assert(parent != NULL); i_assert((parent->flags & (MESSAGE_PART_FLAG_MULTIPART | @@ -141,16 +141,26 @@ message_part_append(struct message_parser_ctx *ctx) parent->body_size.physical_size + parent->header_size.physical_size; - list = &part->parent->children; - while (*list != NULL) - list = &(*list)->next; + /* add to parent's linked list */ + *ctx->next_part = part; + /* update the parent's end-of-linked-list pointer */ + struct message_part **next_part = &part->next; + array_push_back(&ctx->next_part_stack, &next_part); + /* This part is now the new parent for the next message_part_append() + call. Its linked list begins with the children pointer. */ + ctx->next_part = &part->children; - *list = part; ctx->part = part; } static void message_part_finish(struct message_parser_ctx *ctx) { + struct message_part **const *parent_next_partp; + + parent_next_partp = array_back(&ctx->next_part_stack); + array_pop_back(&ctx->next_part_stack); + ctx->next_part = *parent_next_partp; + message_size_add(&ctx->part->parent->body_size, &ctx->part->body_size); message_size_add(&ctx->part->parent->body_size, &ctx->part->header_size); ctx->part->parent->children_count += 1 + ctx->part->children_count; @@ -678,7 +688,9 @@ message_parser_init(pool_t part_pool, struct istream *input, ctx = message_parser_init_int(input, hdr_flags, flags); ctx->part_pool = part_pool; ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1); + ctx->next_part = &ctx->part->children; ctx->parse_next_block = parse_next_header_init; + p_array_init(&ctx->next_part_stack, ctx->parser_pool, 4); return ctx; } -- 2.11.0 From f080cb2da2064005f35e1d16cd4efded21552491 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 12:10:07 +0300 Subject: [PATCH 08/15] lib-mail: message-parser - Minor code cleanup to finding the end of boundary line --- src/lib-mail/message-parser.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index 0f690ab689..88c1b31564 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -211,17 +211,16 @@ boundary_line_find(struct message_parser_ctx *ctx, } /* need to find the end of line */ - if (memchr(data + 2, '\n', size - 2) == NULL && - size < BOUNDARY_END_MAX_LEN && + data += 2; + size -= 2; + if (memchr(data, '\n', size) == NULL && + size+2 < BOUNDARY_END_MAX_LEN && !ctx->input->eof && !full) { /* no LF found */ ctx->want_count = BOUNDARY_END_MAX_LEN; return 0; } - data += 2; - size -= 2; - *boundary_r = boundary_find(ctx->boundaries, data, size); if (*boundary_r == NULL) return -1; -- 2.11.0 From 6c530af237d5f0486d5fff68d114de905ced9b97 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 12:53:12 +0300 Subject: [PATCH 09/15] lib-mail: message-parser - Truncate excessively long MIME boundaries RFC 2046 requires that the boundaries are a maximum of 70 characters (excluding the "--" prefix and suffix). We allow 80 characters for a bit of extra safety. Anything longer than that is truncated and treated the same as if it was just 80 characters. --- src/lib-mail/message-parser-private.h | 3 +- src/lib-mail/message-parser.c | 4 +- src/lib-mail/test-message-parser.c | 95 +++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+), 2 deletions(-) diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h index fd92a48776..d8116259ad 100644 --- a/src/lib-mail/message-parser-private.h +++ b/src/lib-mail/message-parser-private.h @@ -5,7 +5,8 @@ /* RFC-2046 requires boundaries are max. 70 chars + "--" prefix + "--" suffix. We'll add a bit more just in case. */ -#define BOUNDARY_END_MAX_LEN (70 + 2 + 2 + 10) +#define BOUNDARY_STRING_MAX_LEN (70 + 10) +#define BOUNDARY_END_MAX_LEN (BOUNDARY_STRING_MAX_LEN + 2 + 2) struct message_boundary { struct message_boundary *next; diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index 88c1b31564..43142491b2 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -477,8 +477,10 @@ static void parse_content_type(struct message_parser_ctx *ctx, rfc2231_parse(&parser, &results); for (; *results != NULL; results += 2) { if (strcasecmp(results[0], "boundary") == 0) { + /* truncate excessively long boundaries */ ctx->last_boundary = - p_strdup(ctx->parser_pool, results[1]); + p_strndup(ctx->parser_pool, results[1], + BOUNDARY_STRING_MAX_LEN); break; } } diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c index c275707265..6bf1643e88 100644 --- a/src/lib-mail/test-message-parser.c +++ b/src/lib-mail/test-message-parser.c @@ -736,6 +736,100 @@ static void test_message_parser_no_eoh(void) test_end(); } +static void test_message_parser_long_mime_boundary(void) +{ + /* Close the boundaries in wrong reverse order. But because all + boundaries are actually truncated to the same size (..890) it + works the same as if all of them were duplicate boundaries. */ +static const char input_msg[] = +"Content-Type: multipart/mixed; boundary=\"1234567890123456789012345678901234567890123456789012345678901234567890123456789012\"\n" +"\n" +"--1234567890123456789012345678901234567890123456789012345678901234567890123456789012\n" +"Content-Type: multipart/mixed; boundary=\"123456789012345678901234567890123456789012345678901234567890123456789012345678901\"\n" +"\n" +"--123456789012345678901234567890123456789012345678901234567890123456789012345678901\n" +"Content-Type: multipart/mixed; boundary=\"12345678901234567890123456789012345678901234567890123456789012345678901234567890\"\n" +"\n" +"--12345678901234567890123456789012345678901234567890123456789012345678901234567890\n" +"Content-Type: text/plain\n" +"\n" +"1\n" +"--1234567890123456789012345678901234567890123456789012345678901234567890123456789012\n" +"Content-Type: text/plain\n" +"\n" +"22\n" +"--123456789012345678901234567890123456789012345678901234567890123456789012345678901\n" +"Content-Type: text/plain\n" +"\n" +"333\n" +"--12345678901234567890123456789012345678901234567890123456789012345678901234567890\n" +"Content-Type: text/plain\n" +"\n" +"4444\n"; + struct message_parser_ctx *parser; + struct istream *input; + struct message_part *parts, *part; + struct message_block block; + pool_t pool; + int ret; + + test_begin("message parser long mime boundary"); + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + + parser = message_parser_init(pool, input, 0, 0); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); + + part = parts; + test_assert(part->children_count == 6); + test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 126); + test_assert(part->header_size.virtual_size == 126+2); + test_assert(part->body_size.lines == 22); + test_assert(part->body_size.physical_size == 871); + test_assert(part->body_size.virtual_size == 871+22); + + part = parts->children; + test_assert(part->children_count == 5); + test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 125); + test_assert(part->header_size.virtual_size == 125+2); + test_assert(part->body_size.lines == 19); + test_assert(part->body_size.physical_size == 661); + test_assert(part->body_size.virtual_size == 661+19); + + part = parts->children->children; + test_assert(part->children_count == 4); + test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 124); + test_assert(part->header_size.virtual_size == 124+2); + test_assert(part->body_size.lines == 16); + test_assert(part->body_size.physical_size == 453); + test_assert(part->body_size.virtual_size == 453+16); + + part = parts->children->children->children; + for (unsigned int i = 1; i <= 3; i++, part = part->next) { + test_assert(part->children_count == 0); + test_assert(part->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 26); + test_assert(part->header_size.virtual_size == 26+2); + test_assert(part->body_size.lines == 0); + test_assert(part->body_size.physical_size == i); + test_assert(part->body_size.virtual_size == i); + } + + test_parsed_parts(input, parts); + i_stream_unref(&input); + pool_unref(&pool); + test_end(); +} + int main(void) { static void (*const test_functions[])(void) = { @@ -749,6 +843,7 @@ int main(void) test_message_parser_continuing_mime_boundary, test_message_parser_continuing_truncated_mime_boundary, test_message_parser_continuing_mime_boundary_reverse, + test_message_parser_long_mime_boundary, test_message_parser_no_eoh, NULL }; -- 2.11.0 From f6a31b782088d9f626f4718654cb91cf723f3f9c Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 13:06:02 +0300 Subject: [PATCH 10/15] lib-mail: message-parser - Optimize boundary lookups when exact boundary is found When an exact boundary is found, there's no need to continue looking for more boundaries. --- src/lib-mail/message-parser.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index 43142491b2..f0a0cf41f0 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -31,8 +31,14 @@ boundary_find(struct message_boundary *boundaries, while (boundaries != NULL) { if (boundaries->len <= len && memcmp(boundaries->boundary, data, boundaries->len) == 0 && - (best == NULL || best->len < boundaries->len)) + (best == NULL || best->len < boundaries->len)) { best = boundaries; + if (best->len == len) { + /* This is exactly the wanted boundary. There + can't be a better one. */ + break; + } + } boundaries = boundaries->next; } @@ -213,15 +219,27 @@ boundary_line_find(struct message_parser_ctx *ctx, /* need to find the end of line */ data += 2; size -= 2; - if (memchr(data, '\n', size) == NULL && + const unsigned char *lf_pos = memchr(data, '\n', size); + if (lf_pos == NULL && size+2 < BOUNDARY_END_MAX_LEN && !ctx->input->eof && !full) { /* no LF found */ ctx->want_count = BOUNDARY_END_MAX_LEN; return 0; } + size_t find_size = size; + + if (lf_pos != NULL) { + find_size = lf_pos - data; + if (find_size > 0 && data[find_size-1] == '\r') + find_size--; + if (find_size > 2 && data[find_size-1] == '-' && + data[find_size-2] == '-') + find_size -= 2; + } else if (find_size > BOUNDARY_END_MAX_LEN) + find_size = BOUNDARY_END_MAX_LEN; - *boundary_r = boundary_find(ctx->boundaries, data, size); + *boundary_r = boundary_find(ctx->boundaries, data, find_size); if (*boundary_r == NULL) return -1; -- 2.11.0 From 86adf700cc31775744ed48ff5aebe62b97e52c51 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 14:53:27 +0300 Subject: [PATCH 11/15] lib-mail: message-parser - Add boundary_remove_until() helper function --- src/lib-mail/message-parser.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index f0a0cf41f0..0fb0f9181a 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -173,6 +173,13 @@ static void message_part_finish(struct message_parser_ctx *ctx) ctx->part = ctx->part->parent; } +static void +boundary_remove_until(struct message_parser_ctx *ctx, + struct message_boundary *boundary) +{ + ctx->boundaries = boundary; +} + static void parse_next_body_multipart_init(struct message_parser_ctx *ctx) { struct message_boundary *b; @@ -314,10 +321,10 @@ static int parse_part_finish(struct message_parser_ctx *ctx, if (boundary->epilogue_found) { /* this boundary isn't needed anymore */ - ctx->boundaries = boundary->next; + boundary_remove_until(ctx, boundary->next); } else { /* forget about the boundaries we possibly skipped */ - ctx->boundaries = boundary; + boundary_remove_until(ctx, boundary); } /* the boundary itself should already be in buffer. add that. */ -- 2.11.0 From a6f9ff266c1a2458a1762cdb1206b50497cacb09 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 15:00:57 +0300 Subject: [PATCH 12/15] lib-mail: message-parser - Don't use memory pool for parser This reduces memory usage when parsing many MIME parts where boundaries are being added and removed constantly. --- src/lib-mail/message-parser-private.h | 6 ++--- src/lib-mail/message-parser.c | 41 ++++++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h index d8116259ad..fe106819e2 100644 --- a/src/lib-mail/message-parser-private.h +++ b/src/lib-mail/message-parser-private.h @@ -12,14 +12,14 @@ struct message_boundary { struct message_boundary *next; struct message_part *part; - const char *boundary; + char *boundary; size_t len; bool epilogue_found:1; }; struct message_parser_ctx { - pool_t parser_pool, part_pool; + pool_t part_pool; struct istream *input; struct message_part *parts, *part; const char *broken_reason; @@ -27,7 +27,7 @@ struct message_parser_ctx { enum message_header_parser_flags hdr_flags; enum message_parser_flags flags; - const char *last_boundary; + char *last_boundary; struct message_boundary *boundaries; struct message_part **next_part; diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index 0fb0f9181a..c9ff98576b 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -173,10 +173,24 @@ static void message_part_finish(struct message_parser_ctx *ctx) ctx->part = ctx->part->parent; } +static void message_boundary_free(struct message_boundary *b) +{ + i_free(b->boundary); + i_free(b); +} + static void boundary_remove_until(struct message_parser_ctx *ctx, struct message_boundary *boundary) { + while (ctx->boundaries != boundary) { + struct message_boundary *cur = ctx->boundaries; + + i_assert(cur != NULL); + ctx->boundaries = cur->next; + message_boundary_free(cur); + + } ctx->boundaries = boundary; } @@ -184,15 +198,14 @@ static void parse_next_body_multipart_init(struct message_parser_ctx *ctx) { struct message_boundary *b; - b = p_new(ctx->parser_pool, struct message_boundary, 1); + b = i_new(struct message_boundary, 1); b->part = ctx->part; b->boundary = ctx->last_boundary; + ctx->last_boundary = NULL; b->len = strlen(b->boundary); b->next = ctx->boundaries; ctx->boundaries = b; - - ctx->last_boundary = NULL; } static int parse_next_body_message_rfc822_init(struct message_parser_ctx *ctx, @@ -309,6 +322,8 @@ static int parse_part_finish(struct message_parser_ctx *ctx, struct message_block *block_r, bool first_line) { size_t line_size; + size_t boundary_len = boundary->len; + bool boundary_epilogue_found = boundary->epilogue_found; i_assert(ctx->last_boundary == NULL); @@ -341,7 +356,7 @@ static int parse_part_finish(struct message_parser_ctx *ctx, i_assert(block_r->data[0] == '\n'); line_size = 1; } - line_size += 2 + boundary->len + (boundary->epilogue_found ? 2 : 0); + line_size += 2 + boundary_len + (boundary_epilogue_found ? 2 : 0); i_assert(block_r->size >= ctx->skip + line_size); block_r->size = line_size; parse_body_add_block(ctx, block_r); @@ -503,9 +518,9 @@ static void parse_content_type(struct message_parser_ctx *ctx, for (; *results != NULL; results += 2) { if (strcasecmp(results[0], "boundary") == 0) { /* truncate excessively long boundaries */ + i_free(ctx->last_boundary); ctx->last_boundary = - p_strndup(ctx->parser_pool, results[1], - BOUNDARY_STRING_MAX_LEN); + i_strndup(results[1], BOUNDARY_STRING_MAX_LEN); break; } } @@ -628,7 +643,7 @@ static int parse_next_header(struct message_parser_ctx *ctx, i_assert(!ctx->multipart); part->flags = 0; } - ctx->last_boundary = NULL; + i_free(ctx->last_boundary); if (!ctx->part_seen_content_type || (part->flags & MESSAGE_PART_FLAG_IS_MIME) == 0) { @@ -692,11 +707,8 @@ message_parser_init_int(struct istream *input, enum message_parser_flags flags) { struct message_parser_ctx *ctx; - pool_t pool; - pool = pool_alloconly_create("Message Parser", 1024); - ctx = p_new(pool, struct message_parser_ctx, 1); - ctx->parser_pool = pool; + ctx = i_new(struct message_parser_ctx, 1); ctx->hdr_flags = hdr_flags; ctx->flags = flags; ctx->input = input; @@ -716,7 +728,7 @@ message_parser_init(pool_t part_pool, struct istream *input, ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1); ctx->next_part = &ctx->part->children; ctx->parse_next_block = parse_next_header_init; - p_array_init(&ctx->next_part_stack, ctx->parser_pool, 4); + i_array_init(&ctx->next_part_stack, 4); return ctx; } @@ -743,8 +755,11 @@ int message_parser_deinit_from_parts(struct message_parser_ctx **_ctx, if (ctx->hdr_parser_ctx != NULL) message_parse_header_deinit(&ctx->hdr_parser_ctx); + boundary_remove_until(ctx, NULL); i_stream_unref(&ctx->input); - pool_unref(&ctx->parser_pool); + array_free(&ctx->next_part_stack); + i_free(ctx->last_boundary); + i_free(ctx); i_assert(ret < 0 || *parts_r != NULL); return ret; } -- 2.11.0 From 45cd8d8fd39d301607b22ec6f4ac800331a30225 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 16:50:56 +0300 Subject: [PATCH 13/15] lib-mail, global: message_parser_init*() - Convert flags to settings structure --- src/doveadm/doveadm-mail-fetch.c | 7 +++-- src/lib-imap/test-imap-bodystructure.c | 10 +++--- src/lib-imap/test-imap-envelope.c | 10 +++--- src/lib-mail/istream-attachment-extractor.c | 8 +++-- src/lib-mail/istream-binary-converter.c | 8 +++-- src/lib-mail/message-parser-from-parts.c | 5 ++- src/lib-mail/message-parser-private.h | 3 +- src/lib-mail/message-parser.c | 12 +++----- src/lib-mail/message-parser.h | 11 ++++--- src/lib-mail/message-search.c | 9 +++--- src/lib-mail/message-snippet.c | 3 +- src/lib-mail/test-message-decoder.c | 4 ++- src/lib-mail/test-message-parser.c | 47 +++++++++++++++++------------ src/lib-mail/test-message-part.c | 3 +- src/lib-storage/index/index-mail-headers.c | 22 ++++++-------- src/plugins/fts/fts-build-mail.c | 7 +++-- 16 files changed, 95 insertions(+), 74 deletions(-) diff --git a/src/doveadm/doveadm-mail-fetch.c b/src/doveadm/doveadm-mail-fetch.c index 75b69e4242..d8b396a199 100644 --- a/src/doveadm/doveadm-mail-fetch.c +++ b/src/doveadm/doveadm-mail-fetch.c @@ -265,6 +265,9 @@ static int fetch_text(struct fetch_cmd_context *ctx) static int fetch_text_utf8(struct fetch_cmd_context *ctx) { + const struct message_parser_settings parser_set = { + .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, + }; struct istream *input; struct message_parser_ctx *parser; struct message_decoder_context *decoder; @@ -275,9 +278,7 @@ static int fetch_text_utf8(struct fetch_cmd_context *ctx) if (mail_get_stream(ctx->mail, NULL, NULL, &input) < 0) return -1; - parser = message_parser_init(pool_datastack_create(), input, - MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, - 0); + parser = message_parser_init(pool_datastack_create(), input, &parser_set); decoder = message_decoder_init(NULL, 0); while ((ret = message_parser_parse_next_block(parser, &raw_block)) > 0) { diff --git a/src/lib-imap/test-imap-bodystructure.c b/src/lib-imap/test-imap-bodystructure.c index 6f456a4453..dfc9957488 100644 --- a/src/lib-imap/test-imap-bodystructure.c +++ b/src/lib-imap/test-imap-bodystructure.c @@ -381,6 +381,11 @@ static const unsigned int normalize_tests_count = N_ELEMENTS(normalize_tests); static struct message_part * msg_parse(pool_t pool, const char *message, bool parse_bodystructure) { + const struct message_parser_settings parser_set = { + .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | + MESSAGE_HEADER_PARSER_FLAG_DROP_CR, + .flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK, + }; struct message_parser_ctx *parser; struct istream *input; struct message_block block; @@ -388,10 +393,7 @@ msg_parse(pool_t pool, const char *message, bool parse_bodystructure) int ret; input = i_stream_create_from_data(message, strlen(message)); - parser = message_parser_init(pool, input, - MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | - MESSAGE_HEADER_PARSER_FLAG_DROP_CR, - MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK); + parser = message_parser_init(pool, input, &parser_set); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) { if (parse_bodystructure) { message_part_data_parse_from_header(pool, block.part, diff --git a/src/lib-imap/test-imap-envelope.c b/src/lib-imap/test-imap-envelope.c index 0d0891701b..1f295e58ba 100644 --- a/src/lib-imap/test-imap-envelope.c +++ b/src/lib-imap/test-imap-envelope.c @@ -118,6 +118,11 @@ static const unsigned int parse_tests_count = N_ELEMENTS(parse_tests); static struct message_part_envelope * msg_parse(pool_t pool, const char *message) { + const struct message_parser_settings parser_set = { + .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | + MESSAGE_HEADER_PARSER_FLAG_DROP_CR, + .flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK, + }; struct message_parser_ctx *parser; struct message_part_envelope *envlp = NULL; struct istream *input; @@ -126,10 +131,7 @@ msg_parse(pool_t pool, const char *message) int ret; input = i_stream_create_from_data(message, strlen(message)); - parser = message_parser_init(pool, input, - MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | - MESSAGE_HEADER_PARSER_FLAG_DROP_CR, - MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK); + parser = message_parser_init(pool, input, &parser_set); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) { i_assert(block.part->parent == NULL); message_part_envelope_parse_from_header(pool, &envlp, block.hdr); diff --git a/src/lib-mail/istream-attachment-extractor.c b/src/lib-mail/istream-attachment-extractor.c index e9655a5a67..7d4ac01072 100644 --- a/src/lib-mail/istream-attachment-extractor.c +++ b/src/lib-mail/istream-attachment-extractor.c @@ -696,6 +696,10 @@ i_stream_create_attachment_extractor(struct istream *input, struct istream_attachment_settings *set, void *context) { + const struct message_parser_settings parser_set = { + .flags = MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS | + MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES, + }; struct attachment_istream *astream; i_assert(set->min_size > 0); @@ -722,9 +726,7 @@ i_stream_create_attachment_extractor(struct istream *input, astream->istream.istream.seekable = FALSE; astream->pool = pool_alloconly_create("istream attachment", 1024); - astream->parser = message_parser_init(astream->pool, input, 0, - MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS | - MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES); + astream->parser = message_parser_init(astream->pool, input, &parser_set); return i_stream_create(&astream->istream, input, i_stream_get_fd(input), 0); } diff --git a/src/lib-mail/istream-binary-converter.c b/src/lib-mail/istream-binary-converter.c index 201a588152..856b854738 100644 --- a/src/lib-mail/istream-binary-converter.c +++ b/src/lib-mail/istream-binary-converter.c @@ -286,6 +286,10 @@ static void i_stream_binary_converter_close(struct iostream_private *stream, struct istream *i_stream_create_binary_converter(struct istream *input) { + const struct message_parser_settings parser_set = { + .flags = MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS | + MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES, + }; struct binary_converter_istream *bstream; bstream = i_new(struct binary_converter_istream, 1); @@ -299,9 +303,7 @@ struct istream *i_stream_create_binary_converter(struct istream *input) bstream->istream.istream.seekable = FALSE; bstream->pool = pool_alloconly_create("istream binary converter", 128); - bstream->parser = message_parser_init(bstream->pool, input, 0, - MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS | - MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES); + bstream->parser = message_parser_init(bstream->pool, input, &parser_set); return i_stream_create(&bstream->istream, input, i_stream_get_fd(input), 0); } diff --git a/src/lib-mail/message-parser-from-parts.c b/src/lib-mail/message-parser-from-parts.c index b23055ab9b..8e21ec8f18 100644 --- a/src/lib-mail/message-parser-from-parts.c +++ b/src/lib-mail/message-parser-from-parts.c @@ -351,14 +351,13 @@ static int preparsed_parse_next_header_init(struct message_parser_ctx *ctx, struct message_parser_ctx * message_parser_init_from_parts(struct message_part *parts, struct istream *input, - enum message_header_parser_flags hdr_flags, - enum message_parser_flags flags) + const struct message_parser_settings *set) { struct message_parser_ctx *ctx; i_assert(parts != NULL); - ctx = message_parser_init_int(input, hdr_flags, flags); + ctx = message_parser_init_int(input, set); ctx->preparsed = TRUE; ctx->parts = ctx->part = parts; ctx->parse_next_block = preparsed_parse_next_header_init; diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h index fe106819e2..dbf8464cfb 100644 --- a/src/lib-mail/message-parser-private.h +++ b/src/lib-mail/message-parser-private.h @@ -51,8 +51,7 @@ struct message_parser_ctx { struct message_parser_ctx * message_parser_init_int(struct istream *input, - enum message_header_parser_flags hdr_flags, - enum message_parser_flags flags); + const struct message_parser_settings *set); int message_parser_read_more(struct message_parser_ctx *ctx, struct message_block *block_r, bool *full_r); diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index c9ff98576b..41b9ed133a 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -703,14 +703,13 @@ static int parse_next_header_init(struct message_parser_ctx *ctx, struct message_parser_ctx * message_parser_init_int(struct istream *input, - enum message_header_parser_flags hdr_flags, - enum message_parser_flags flags) + const struct message_parser_settings *set) { struct message_parser_ctx *ctx; ctx = i_new(struct message_parser_ctx, 1); - ctx->hdr_flags = hdr_flags; - ctx->flags = flags; + ctx->hdr_flags = set->hdr_flags; + ctx->flags = set->flags; ctx->input = input; i_stream_ref(input); return ctx; @@ -718,12 +717,11 @@ message_parser_init_int(struct istream *input, struct message_parser_ctx * message_parser_init(pool_t part_pool, struct istream *input, - enum message_header_parser_flags hdr_flags, - enum message_parser_flags flags) + const struct message_parser_settings *set) { struct message_parser_ctx *ctx; - ctx = message_parser_init_int(input, hdr_flags, flags); + ctx = message_parser_init_int(input, set); ctx->part_pool = part_pool; ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1); ctx->next_part = &ctx->part->children; diff --git a/src/lib-mail/message-parser.h b/src/lib-mail/message-parser.h index 3efd851862..d159b2607d 100644 --- a/src/lib-mail/message-parser.h +++ b/src/lib-mail/message-parser.h @@ -17,6 +17,11 @@ enum message_parser_flags { MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES = 0x08 }; +struct message_parser_settings { + enum message_header_parser_flags hdr_flags; + enum message_parser_flags flags; +}; + struct message_parser_ctx; struct message_block { @@ -45,8 +50,7 @@ extern message_part_header_callback_t *null_message_part_header_callback; are allocated from. */ struct message_parser_ctx * message_parser_init(pool_t part_pool, struct istream *input, - enum message_header_parser_flags hdr_flags, - enum message_parser_flags flags); + const struct message_parser_settings *set); /* Deinitialize message parser. The ctx must NOT have been created by message_parser_init_from_parts(). */ void message_parser_deinit(struct message_parser_ctx **ctx, @@ -55,8 +59,7 @@ void message_parser_deinit(struct message_parser_ctx **ctx, struct message_parser_ctx * message_parser_init_from_parts(struct message_part *parts, struct istream *input, - enum message_header_parser_flags hdr_flags, - enum message_parser_flags flags); + const struct message_parser_settings *set); /* Same as message_parser_deinit(), but return an error message describing why the preparsed parts didn't match the message. This can also safely be called even when preparsed parts weren't used - it'll always just return diff --git a/src/lib-mail/message-search.c b/src/lib-mail/message-search.c index 66c043c158..14d1a11470 100644 --- a/src/lib-mail/message-search.c +++ b/src/lib-mail/message-search.c @@ -196,8 +196,9 @@ message_search_msg_real(struct message_search_context *ctx, struct istream *input, struct message_part *parts, const char **error_r) { - const enum message_header_parser_flags hdr_parser_flags = - MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE; + const struct message_parser_settings parser_set = { + .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, + }; struct message_parser_ctx *parser_ctx; struct message_block raw_block; struct message_part *new_parts; @@ -207,10 +208,10 @@ message_search_msg_real(struct message_search_context *ctx, if (parts != NULL) { parser_ctx = message_parser_init_from_parts(parts, - input, hdr_parser_flags, 0); + input, &parser_set); } else { parser_ctx = message_parser_init(pool_datastack_create(), - input, hdr_parser_flags, 0); + input, &parser_set); } while ((ret = message_parser_parse_next_block(parser_ctx, diff --git a/src/lib-mail/message-snippet.c b/src/lib-mail/message-snippet.c index 2100b70554..e6965fd707 100644 --- a/src/lib-mail/message-snippet.c +++ b/src/lib-mail/message-snippet.c @@ -137,6 +137,7 @@ int message_snippet_generate(struct istream *input, unsigned int max_snippet_chars, string_t *snippet) { + const struct message_parser_settings parser_set = { .flags = 0 }; struct message_parser_ctx *parser; struct message_part *parts; struct message_decoder_context *decoder; @@ -151,7 +152,7 @@ int message_snippet_generate(struct istream *input, ctx.snippet.chars_left = max_snippet_chars; ctx.quoted_snippet.snippet = str_new(pool, max_snippet_chars); ctx.quoted_snippet.chars_left = max_snippet_chars - 1; /* -1 for '>' */ - parser = message_parser_init(pool_datastack_create(), input, 0, 0); + parser = message_parser_init(pool_datastack_create(), input, &parser_set); decoder = message_decoder_init(NULL, 0); while ((ret = message_parser_parse_next_block(parser, &raw_block)) > 0) { if (!message_decoder_decode_next_block(decoder, &raw_block, &block)) diff --git a/src/lib-mail/test-message-decoder.c b/src/lib-mail/test-message-decoder.c index e1faca29b4..3007283cad 100644 --- a/src/lib-mail/test-message-decoder.c +++ b/src/lib-mail/test-message-decoder.c @@ -105,6 +105,7 @@ static void test_message_decoder_multipart(void) "\n" "?garbage\n" "--foo--\n"; + const struct message_parser_settings parser_set = { .flags = 0, }; struct message_parser_ctx *parser; struct message_decoder_context *decoder; struct message_part *parts; @@ -116,7 +117,8 @@ static void test_message_decoder_multipart(void) test_begin("message decoder multipart"); istream = test_istream_create(test_message_input); - parser = message_parser_init(pool_datastack_create(), istream, 0, 0); + parser = message_parser_init(pool_datastack_create(), istream, + &parser_set); decoder = message_decoder_init(NULL, 0); test_istream_set_allow_eof(istream, FALSE); diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c index 6bf1643e88..5e496275fe 100644 --- a/src/lib-mail/test-message-parser.c +++ b/src/lib-mail/test-message-parser.c @@ -39,6 +39,8 @@ static const char test_msg[] = "\n"; #define TEST_MSG_LEN (sizeof(test_msg)-1) +static const struct message_parser_settings set_empty = { .flags = 0 }; + static bool msg_parts_cmp(struct message_part *p1, struct message_part *p2) { while (p1 != NULL || p2 != NULL) { @@ -71,6 +73,9 @@ static bool msg_parts_cmp(struct message_part *p1, struct message_part *p2) static void test_parsed_parts(struct istream *input, struct message_part *parts) { + const struct message_parser_settings parser_set = { + .flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK, + }; struct message_parser_ctx *parser; struct message_block block; struct message_part *parts2; @@ -81,8 +86,7 @@ static void test_parsed_parts(struct istream *input, struct message_part *parts) if (i_stream_get_size(input, TRUE, &input_size) < 0) i_unreached(); - parser = message_parser_init_from_parts(parts, input, 0, - MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK); + parser = message_parser_init_from_parts(parts, input, &parser_set); for (i = 1; i <= input_size*2+1; i++) { test_istream_set_size(input, i/2); if (i > TEST_MSG_LEN*2) @@ -111,9 +115,11 @@ static void test_message_parser_small_blocks(void) output = t_str_new(128); /* full parsing */ - parser = message_parser_init(pool, input, 0, - MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS | - MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES); + const struct message_parser_settings full_parser_set = { + .flags = MESSAGE_PARSER_FLAG_INCLUDE_MULTIPART_BLOCKS | + MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES, + }; + parser = message_parser_init(pool, input, &full_parser_set); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) { if (block.hdr != NULL) message_header_line_write(output, block.hdr); @@ -129,7 +135,7 @@ static void test_message_parser_small_blocks(void) i_stream_seek(input, 0); test_istream_set_allow_eof(input, FALSE); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set_empty); for (i = 1; i <= TEST_MSG_LEN*2+1; i++) { test_istream_set_size(input, i/2); if (i > TEST_MSG_LEN*2) @@ -147,8 +153,11 @@ static void test_message_parser_small_blocks(void) test_istream_set_allow_eof(input, FALSE); end_of_headers_idx = (strstr(test_msg, "\n-----") - test_msg); - parser = message_parser_init_from_parts(parts, input, 0, - MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK); + const struct message_parser_settings preparsed_parser_set = { + .flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK, + }; + parser = message_parser_init_from_parts(parts, input, + &preparsed_parser_set); for (i = 1; i <= TEST_MSG_LEN*2+1; i++) { test_istream_set_size(input, i/2); if (i > TEST_MSG_LEN*2) @@ -190,7 +199,7 @@ static const char input_msg[] = pool = pool_alloconly_create("message parser", 10240); input = test_istream_create(input_msg); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set_empty); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; test_assert(ret < 0); message_parser_deinit(&parser, &parts); @@ -255,7 +264,7 @@ static const char input_msg[] = pool = pool_alloconly_create("message parser", 10240); input = test_istream_create(input_msg); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set_empty); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; test_assert(ret < 0); message_parser_deinit(&parser, &parts); @@ -311,7 +320,7 @@ static const char input_msg[] = pool = pool_alloconly_create("message parser", 10240); input = test_istream_create(input_msg); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set_empty); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; test_assert(ret < 0); message_parser_deinit(&parser, &parts); @@ -349,7 +358,7 @@ static const char input_msg[] = pool = pool_alloconly_create("message parser", 10240); input = test_istream_create(input_msg); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set_empty); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; test_assert(ret < 0); message_parser_deinit(&parser, &parts); @@ -394,7 +403,7 @@ static const char input_msg[] = pool = pool_alloconly_create("message parser", 10240); input = test_istream_create(input_msg); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set_empty); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; test_assert(ret < 0); message_parser_deinit(&parser, &parts); @@ -455,7 +464,7 @@ static const char input_msg[] = pool = pool_alloconly_create("message parser", 10240); input = test_istream_create(input_msg); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set_empty); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; test_assert(ret < 0); message_parser_deinit(&parser, &parts); @@ -516,7 +525,7 @@ static const char input_msg[] = pool = pool_alloconly_create("message parser", 10240); input = test_istream_create(input_msg); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set_empty); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; test_assert(ret < 0); message_parser_deinit(&parser, &parts); @@ -578,7 +587,7 @@ static const char input_msg[] = pool = pool_alloconly_create("message parser", 10240); input = test_istream_create(input_msg); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set_empty); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; test_assert(ret < 0); message_parser_deinit(&parser, &parts); @@ -661,7 +670,7 @@ static const char input_msg[] = pool = pool_alloconly_create("message parser", 10240); input = test_istream_create(input_msg); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set_empty); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; test_assert(ret < 0); message_parser_deinit(&parser, &parts); @@ -721,7 +730,7 @@ static void test_message_parser_no_eoh(void) pool = pool_alloconly_create("message parser", 10240); input = test_istream_create(input_msg); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set_empty); test_assert(message_parser_parse_next_block(parser, &block) > 0 && block.hdr != NULL && strcmp(block.hdr->name, "a") == 0 && block.hdr->value_len == 1 && block.hdr->value[0] == 'b'); @@ -777,7 +786,7 @@ static const char input_msg[] = pool = pool_alloconly_create("message parser", 10240); input = test_istream_create(input_msg); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set_empty); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; test_assert(ret < 0); message_parser_deinit(&parser, &parts); diff --git a/src/lib-mail/test-message-part.c b/src/lib-mail/test-message-part.c index 6cad8ffbd3..4a51a39f99 100644 --- a/src/lib-mail/test-message-part.c +++ b/src/lib-mail/test-message-part.c @@ -65,6 +65,7 @@ static const char test_msg[] = static void test_message_part_idx(void) { + const struct message_parser_settings set = { .flags = 0 }; struct message_parser_ctx *parser; struct istream *input; struct message_part *parts, *part, *prev_part; @@ -77,7 +78,7 @@ static void test_message_part_idx(void) pool = pool_alloconly_create("message parser", 10240); input = i_stream_create_from_data(test_msg, TEST_MSG_LEN); - parser = message_parser_init(pool, input, 0, 0); + parser = message_parser_init(pool, input, &set); while ((ret = message_parser_parse_next_block(parser, &block)) > 0) { part_idx = message_part_to_idx(block.part); test_assert(part_idx >= prev_idx); diff --git a/src/lib-storage/index/index-mail-headers.c b/src/lib-storage/index/index-mail-headers.c index 54a5883177..df21b9129e 100644 --- a/src/lib-storage/index/index-mail-headers.c +++ b/src/lib-storage/index/index-mail-headers.c @@ -16,11 +16,11 @@ #include "index-storage.h" #include "index-mail.h" -static const enum message_header_parser_flags hdr_parser_flags = - MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | - MESSAGE_HEADER_PARSER_FLAG_DROP_CR; -static const enum message_parser_flags msg_parser_flags = - MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK; +static const struct message_parser_settings msg_parser_set = { + .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_SKIP_INITIAL_LWSP | + MESSAGE_HEADER_PARSER_FLAG_DROP_CR, + .flags = MESSAGE_PARSER_FLAG_SKIP_BODY_BLOCK, +}; static int header_line_cmp(const struct index_mail_line *l1, const struct index_mail_line *l2) @@ -399,7 +399,7 @@ index_mail_cache_parse_init(struct mail *_mail, struct istream *input) mail->data.parser_input = input; mail->data.parser_ctx = message_parser_init(mail->mail.data_pool, input, - hdr_parser_flags, msg_parser_flags); + &msg_parser_set); i_stream_unref(&input); return input2; } @@ -428,14 +428,12 @@ static void index_mail_init_parser(struct index_mail *mail) data->parser_input = data->stream; data->parser_ctx = message_parser_init(mail->mail.data_pool, data->stream, - hdr_parser_flags, - msg_parser_flags); + &msg_parser_set); } else { data->parser_ctx = message_parser_init_from_parts(data->parts, data->stream, - hdr_parser_flags, - msg_parser_flags); + &msg_parser_set); } } @@ -468,7 +466,7 @@ int index_mail_parse_headers(struct index_mail *mail, i_assert(!data->save_bodystructure_body || data->parser_ctx != NULL); message_parse_header(data->stream, &data->hdr_size, - hdr_parser_flags, + msg_parser_set.hdr_flags, index_mail_parse_header_cb, mail); } if (index_mail_stream_check_failure(mail) < 0) @@ -526,7 +524,7 @@ int index_mail_headers_get_envelope(struct index_mail *mail) if (mail->data.envelope == NULL) { /* we got the headers from cache - parse them to get the envelope */ - message_parse_header(stream, NULL, hdr_parser_flags, + message_parse_header(stream, NULL, msg_parser_set.hdr_flags, imap_envelope_parse_callback, mail); if (stream->stream_errno != 0) { index_mail_stream_log_failure_for(mail, stream); diff --git a/src/plugins/fts/fts-build-mail.c b/src/plugins/fts/fts-build-mail.c index 3cb4ea657f..e088e7397b 100644 --- a/src/plugins/fts/fts-build-mail.c +++ b/src/plugins/fts/fts-build-mail.c @@ -475,6 +475,9 @@ fts_build_mail_real(struct fts_backend_update_context *update_ctx, const char **retriable_err_msg_r, bool *may_need_retry_r) { + const struct message_parser_settings parser_set = { + .hdr_flags = MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, + }; struct fts_mail_build_context ctx; struct istream *input; struct message_parser_ctx *parser; @@ -503,9 +506,7 @@ fts_build_mail_real(struct fts_backend_update_context *update_ctx, ctx.pending_input = buffer_create_dynamic(default_pool, 128); prev_part = NULL; - parser = message_parser_init(pool_datastack_create(), input, - MESSAGE_HEADER_PARSER_FLAG_CLEAN_ONELINE, - 0); + parser = message_parser_init(pool_datastack_create(), input, &parser_set); decoder = message_decoder_init(update_ctx->normalizer, 0); for (;;) { -- 2.11.0 From e6fe2ef5d5a8620308d42a7ed1b35e907c645f72 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 16:59:40 +0300 Subject: [PATCH 14/15] lib-mail: message-parser - Support limiting max number of nested MIME parts The default is to allow 100 nested MIME parts. When the limit is reached, the innermost MIME part's body contains all the rest of the inner bodies until a parent MIME part is reached. --- src/lib-mail/message-parser-private.h | 2 + src/lib-mail/message-parser.c | 39 ++++++-- src/lib-mail/message-parser.h | 6 ++ src/lib-mail/test-message-parser.c | 163 ++++++++++++++++++++++++++++++++++ 4 files changed, 203 insertions(+), 7 deletions(-) diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h index dbf8464cfb..4bb0c3dbfd 100644 --- a/src/lib-mail/message-parser-private.h +++ b/src/lib-mail/message-parser-private.h @@ -23,9 +23,11 @@ struct message_parser_ctx { struct istream *input; struct message_part *parts, *part; const char *broken_reason; + unsigned int nested_parts_count; enum message_header_parser_flags hdr_flags; enum message_parser_flags flags; + unsigned int max_nested_mime_parts; char *last_boundary; struct message_boundary *boundaries; diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index 41b9ed133a..ea0154d5ed 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -157,12 +157,17 @@ message_part_append(struct message_parser_ctx *ctx) ctx->next_part = &part->children; ctx->part = part; + ctx->nested_parts_count++; + i_assert(ctx->nested_parts_count < ctx->max_nested_mime_parts); } static void message_part_finish(struct message_parser_ctx *ctx) { struct message_part **const *parent_next_partp; + i_assert(ctx->nested_parts_count > 0); + ctx->nested_parts_count--; + parent_next_partp = array_back(&ctx->next_part_stack); array_pop_back(&ctx->next_part_stack); ctx->next_part = *parent_next_partp; @@ -542,6 +547,11 @@ static bool block_is_at_eoh(const struct message_block *block) return FALSE; } +static bool parse_too_many_nested_mime_parts(struct message_parser_ctx *ctx) +{ + return ctx->nested_parts_count+1 >= ctx->max_nested_mime_parts; +} + #define MUTEX_FLAGS \ (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_MULTIPART) @@ -566,8 +576,12 @@ static int parse_next_header(struct message_parser_ctx *ctx, "\n--boundary" belongs to us or to a previous boundary. this is a problem if the boundary prefixes are identical, because MIME requires only the prefix to match. */ - parse_next_body_multipart_init(ctx); - ctx->multipart = TRUE; + if (!parse_too_many_nested_mime_parts(ctx)) { + parse_next_body_multipart_init(ctx); + ctx->multipart = TRUE; + } else { + part->flags &= ~MESSAGE_PART_FLAG_MULTIPART; + } } /* before parsing the header see if we can find a --boundary from here. @@ -671,12 +685,16 @@ static int parse_next_header(struct message_parser_ctx *ctx, i_assert(ctx->last_boundary == NULL); ctx->multipart = FALSE; ctx->parse_next_block = parse_next_body_to_boundary; - } else if ((part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0) + } else if ((part->flags & MESSAGE_PART_FLAG_MESSAGE_RFC822) != 0 && + !parse_too_many_nested_mime_parts(ctx)) { ctx->parse_next_block = parse_next_body_message_rfc822_init; - else if (ctx->boundaries != NULL) - ctx->parse_next_block = parse_next_body_to_boundary; - else - ctx->parse_next_block = parse_next_body_to_eof; + } else { + part->flags &= ~MESSAGE_PART_FLAG_MESSAGE_RFC822; + if (ctx->boundaries != NULL) + ctx->parse_next_block = parse_next_body_to_boundary; + else + ctx->parse_next_block = parse_next_body_to_eof; + } ctx->want_count = 1; @@ -710,6 +728,9 @@ message_parser_init_int(struct istream *input, ctx = i_new(struct message_parser_ctx, 1); ctx->hdr_flags = set->hdr_flags; ctx->flags = set->flags; + ctx->max_nested_mime_parts = set->max_nested_mime_parts != 0 ? + set->max_nested_mime_parts : + MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS; ctx->input = input; i_stream_ref(input); return ctx; @@ -754,6 +775,10 @@ int message_parser_deinit_from_parts(struct message_parser_ctx **_ctx, if (ctx->hdr_parser_ctx != NULL) message_parse_header_deinit(&ctx->hdr_parser_ctx); boundary_remove_until(ctx, NULL); + /* caller might have stopped the parsing early */ + i_assert(ctx->nested_parts_count == 0 || + i_stream_have_bytes_left(ctx->input)); + i_stream_unref(&ctx->input); array_free(&ctx->next_part_stack); i_free(ctx->last_boundary); diff --git a/src/lib-mail/message-parser.h b/src/lib-mail/message-parser.h index d159b2607d..7f6ea04936 100644 --- a/src/lib-mail/message-parser.h +++ b/src/lib-mail/message-parser.h @@ -17,9 +17,15 @@ enum message_parser_flags { MESSAGE_PARSER_FLAG_INCLUDE_BOUNDARIES = 0x08 }; +#define MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS 100 + struct message_parser_settings { enum message_header_parser_flags hdr_flags; enum message_parser_flags flags; + + /* Maximum nested MIME parts. + 0 = MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS. */ + unsigned int max_nested_mime_parts; }; struct message_parser_ctx; diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c index 5e496275fe..df2586eddd 100644 --- a/src/lib-mail/test-message-parser.c +++ b/src/lib-mail/test-message-parser.c @@ -175,6 +175,36 @@ static void test_message_parser_small_blocks(void) test_end(); } +static void test_message_parser_stop_early(void) +{ + struct message_parser_ctx *parser; + struct istream *input; + struct message_part *parts; + struct message_block block; + unsigned int i; + pool_t pool; + int ret; + + test_begin("message parser in stop early"); + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(test_msg); + + test_istream_set_allow_eof(input, FALSE); + for (i = 1; i <= TEST_MSG_LEN+1; i++) { + i_stream_seek(input, 0); + test_istream_set_size(input, i); + parser = message_parser_init(pool, input, &set_empty); + while ((ret = message_parser_parse_next_block(parser, + &block)) > 0) ; + test_assert(ret == 0); + message_parser_deinit(&parser, &parts); + } + + i_stream_unref(&input); + pool_unref(&pool); + test_end(); +} + static void test_message_parser_truncated_mime_headers(void) { static const char input_msg[] = @@ -839,10 +869,141 @@ static const char input_msg[] = test_end(); } +static void test_message_parser_mime_part_nested_limit(void) +{ +static const char input_msg[] = +"Content-Type: multipart/mixed; boundary=\"1\"\n" +"\n" +"--1\n" +"Content-Type: multipart/mixed; boundary=\"2\"\n" +"\n" +"--2\n" +"Content-Type: text/plain\n" +"\n" +"1\n" +"--2\n" +"Content-Type: text/plain\n" +"\n" +"22\n" +"--1\n" +"Content-Type: text/plain\n" +"\n" +"333\n"; + const struct message_parser_settings parser_set = { + .max_nested_mime_parts = 2, + }; + struct message_parser_ctx *parser; + struct istream *input; + struct message_part *parts, *part; + struct message_block block; + pool_t pool; + int ret; + + test_begin("message parser mime part nested limit"); + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + + parser = message_parser_init(pool, input, &parser_set); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); + + part = parts; + test_assert(part->children_count == 2); + test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 45); + test_assert(part->header_size.virtual_size == 45+2); + test_assert(part->body_size.lines == 15); + test_assert(part->body_size.physical_size == 148); + test_assert(part->body_size.virtual_size == 148+15); + + part = parts->children; + test_assert(part->children_count == 0); + test_assert(part->flags == MESSAGE_PART_FLAG_IS_MIME); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 45); + test_assert(part->header_size.virtual_size == 45+2); + test_assert(part->body_size.lines == 7); + test_assert(part->body_size.physical_size == 64); + test_assert(part->body_size.virtual_size == 64+7); + + part = parts->children->next; + test_assert(part->children_count == 0); + test_assert(part->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 26); + test_assert(part->header_size.virtual_size == 26+2); + test_assert(part->body_size.lines == 1); + test_assert(part->body_size.physical_size == 4); + test_assert(part->body_size.virtual_size == 4+1); + + test_parsed_parts(input, parts); + i_stream_unref(&input); + pool_unref(&pool); + test_end(); +} + +static void test_message_parser_mime_part_nested_limit_rfc822(void) +{ +static const char input_msg[] = +"Content-Type: message/rfc822\n" +"\n" +"Content-Type: message/rfc822\n" +"\n" +"Content-Type: text/plain\n" +"\n" +"1\n"; + const struct message_parser_settings parser_set = { + .max_nested_mime_parts = 2, + }; + struct message_parser_ctx *parser; + struct istream *input; + struct message_part *parts, *part; + struct message_block block; + pool_t pool; + int ret; + + test_begin("message parser mime part nested limit rfc822"); + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + + parser = message_parser_init(pool, input, &parser_set); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); + + part = parts; + test_assert(part->children_count == 1); + test_assert(part->flags == (MESSAGE_PART_FLAG_MESSAGE_RFC822 | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 30); + test_assert(part->header_size.virtual_size == 30+2); + test_assert(part->body_size.lines == 5); + test_assert(part->body_size.physical_size == 58); + test_assert(part->body_size.virtual_size == 58+5); + + part = parts->children; + test_assert(part->children_count == 0); + test_assert(part->flags == MESSAGE_PART_FLAG_IS_MIME); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 30); + test_assert(part->header_size.virtual_size == 30+2); + test_assert(part->body_size.lines == 3); + test_assert(part->body_size.physical_size == 28); + test_assert(part->body_size.virtual_size == 28+3); + + test_parsed_parts(input, parts); + i_stream_unref(&input); + pool_unref(&pool); + test_end(); +} + int main(void) { static void (*const test_functions[])(void) = { test_message_parser_small_blocks, + test_message_parser_stop_early, test_message_parser_truncated_mime_headers, test_message_parser_truncated_mime_headers2, test_message_parser_truncated_mime_headers3, @@ -854,6 +1015,8 @@ int main(void) test_message_parser_continuing_mime_boundary_reverse, test_message_parser_long_mime_boundary, test_message_parser_no_eoh, + test_message_parser_mime_part_nested_limit, + test_message_parser_mime_part_nested_limit_rfc822, NULL }; return test_run(test_functions); -- 2.11.0 From 39e1ba38c2b3a5e3e567a35b7f6c414c7ed43769 Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 23 Apr 2020 17:09:33 +0300 Subject: [PATCH 15/15] lib-mail: message-parser - Support limiting max number of MIME parts The default is to allow 10000 MIME parts. When it's reached, no more MIME boundary lines will be recognized, so the rest of the mail belongs to the last added MIME part. --- src/lib-mail/message-parser-private.h | 2 + src/lib-mail/message-parser.c | 12 +++++ src/lib-mail/message-parser.h | 4 ++ src/lib-mail/test-message-parser.c | 86 +++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+) diff --git a/src/lib-mail/message-parser-private.h b/src/lib-mail/message-parser-private.h index 4bb0c3dbfd..1f9c66b827 100644 --- a/src/lib-mail/message-parser-private.h +++ b/src/lib-mail/message-parser-private.h @@ -24,10 +24,12 @@ struct message_parser_ctx { struct message_part *parts, *part; const char *broken_reason; unsigned int nested_parts_count; + unsigned int total_parts_count; enum message_header_parser_flags hdr_flags; enum message_parser_flags flags; unsigned int max_nested_mime_parts; + unsigned int max_total_mime_parts; char *last_boundary; struct message_boundary *boundaries; diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index ea0154d5ed..6370a1bff7 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -158,7 +158,9 @@ message_part_append(struct message_parser_ctx *ctx) ctx->part = part; ctx->nested_parts_count++; + ctx->total_parts_count++; i_assert(ctx->nested_parts_count < ctx->max_nested_mime_parts); + i_assert(ctx->total_parts_count <= ctx->max_total_mime_parts); } static void message_part_finish(struct message_parser_ctx *ctx) @@ -241,6 +243,12 @@ boundary_line_find(struct message_parser_ctx *ctx, return -1; } + if (ctx->total_parts_count >= ctx->max_total_mime_parts) { + /* can't add any more MIME parts. just stop trying to find + more boundaries. */ + return -1; + } + /* need to find the end of line */ data += 2; size -= 2; @@ -731,6 +739,9 @@ message_parser_init_int(struct istream *input, ctx->max_nested_mime_parts = set->max_nested_mime_parts != 0 ? set->max_nested_mime_parts : MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS; + ctx->max_total_mime_parts = set->max_total_mime_parts != 0 ? + set->max_total_mime_parts : + MESSAGE_PARSER_DEFAULT_MAX_TOTAL_MIME_PARTS; ctx->input = input; i_stream_ref(input); return ctx; @@ -747,6 +758,7 @@ message_parser_init(pool_t part_pool, struct istream *input, ctx->parts = ctx->part = p_new(part_pool, struct message_part, 1); ctx->next_part = &ctx->part->children; ctx->parse_next_block = parse_next_header_init; + ctx->total_parts_count = 1; i_array_init(&ctx->next_part_stack, 4); return ctx; } diff --git a/src/lib-mail/message-parser.h b/src/lib-mail/message-parser.h index 7f6ea04936..f19e526284 100644 --- a/src/lib-mail/message-parser.h +++ b/src/lib-mail/message-parser.h @@ -18,6 +18,7 @@ enum message_parser_flags { }; #define MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS 100 +#define MESSAGE_PARSER_DEFAULT_MAX_TOTAL_MIME_PARTS 10000 struct message_parser_settings { enum message_header_parser_flags hdr_flags; @@ -26,6 +27,9 @@ struct message_parser_settings { /* Maximum nested MIME parts. 0 = MESSAGE_PARSER_DEFAULT_MAX_NESTED_MIME_PARTS. */ unsigned int max_nested_mime_parts; + /* Maximum MIME parts in total. + 0 = MESSAGE_PARSER_DEFAULT_MAX_TOTAL_MIME_PARTS. */ + unsigned int max_total_mime_parts; }; struct message_parser_ctx; diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c index df2586eddd..0422b42265 100644 --- a/src/lib-mail/test-message-parser.c +++ b/src/lib-mail/test-message-parser.c @@ -999,6 +999,91 @@ static const char input_msg[] = test_end(); } +static void test_message_parser_mime_part_limit(void) +{ +static const char input_msg[] = +"Content-Type: multipart/mixed; boundary=\"1\"\n" +"\n" +"--1\n" +"Content-Type: multipart/mixed; boundary=\"2\"\n" +"\n" +"--2\n" +"Content-Type: text/plain\n" +"\n" +"1\n" +"--2\n" +"Content-Type: text/plain\n" +"\n" +"22\n" +"--1\n" +"Content-Type: text/plain\n" +"\n" +"333\n"; + const struct message_parser_settings parser_set = { + .max_total_mime_parts = 4, + }; + struct message_parser_ctx *parser; + struct istream *input; + struct message_part *parts, *part; + struct message_block block; + pool_t pool; + int ret; + + test_begin("message parser mime part limit"); + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + + parser = message_parser_init(pool, input, &parser_set); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); + + part = parts; + test_assert(part->children_count == 3); + test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 45); + test_assert(part->header_size.virtual_size == 45+2); + test_assert(part->body_size.lines == 15); + test_assert(part->body_size.physical_size == 148); + test_assert(part->body_size.virtual_size == 148+15); + + part = parts->children; + test_assert(part->children_count == 2); + test_assert(part->flags == (MESSAGE_PART_FLAG_MULTIPART | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 45); + test_assert(part->header_size.virtual_size == 45+2); + test_assert(part->body_size.lines == 12); + test_assert(part->body_size.physical_size == 99); + test_assert(part->body_size.virtual_size == 99+12); + + part = parts->children->children; + test_assert(part->children_count == 0); + test_assert(part->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 26); + test_assert(part->header_size.virtual_size == 26+2); + test_assert(part->body_size.lines == 0); + test_assert(part->body_size.physical_size == 1); + test_assert(part->body_size.virtual_size == 1); + + part = parts->children->children->next; + test_assert(part->children_count == 0); + test_assert(part->flags == (MESSAGE_PART_FLAG_TEXT | MESSAGE_PART_FLAG_IS_MIME)); + test_assert(part->header_size.lines == 2); + test_assert(part->header_size.physical_size == 26); + test_assert(part->header_size.virtual_size == 26+2); + test_assert(part->body_size.lines == 5); + test_assert(part->body_size.physical_size == 37); + test_assert(part->body_size.virtual_size == 37+5); + + test_parsed_parts(input, parts); + i_stream_unref(&input); + pool_unref(&pool); + test_end(); +} + int main(void) { static void (*const test_functions[])(void) = { @@ -1017,6 +1102,7 @@ int main(void) test_message_parser_no_eoh, test_message_parser_mime_part_nested_limit, test_message_parser_mime_part_nested_limit_rfc822, + test_message_parser_mime_part_limit, NULL }; return test_run(test_functions); -- 2.11.0 From 6001e4b48c5a23735eb9c4ca9a187a175fd1a1da Mon Sep 17 00:00:00 2001 From: Timo Sirainen Date: Thu, 2 Jul 2020 17:31:19 +0300 Subject: [PATCH] lib-mail: Fix handling trailing "--" in MIME boundaries Broken by 5b8ec27fae941d06516c30476dcf4820c6d200ab --- src/lib-mail/message-parser.c | 14 ++++++++---- src/lib-mail/test-message-parser.c | 46 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/src/lib-mail/message-parser.c b/src/lib-mail/message-parser.c index 6370a1bff7..011dea9050 100644 --- a/src/lib-mail/message-parser.c +++ b/src/lib-mail/message-parser.c @@ -19,7 +19,7 @@ static int parse_next_body_to_eof(struct message_parser_ctx *ctx, static struct message_boundary * boundary_find(struct message_boundary *boundaries, - const unsigned char *data, size_t len) + const unsigned char *data, size_t len, bool trailing_dashes) { struct message_boundary *best = NULL; @@ -33,7 +33,11 @@ boundary_find(struct message_boundary *boundaries, memcmp(boundaries->boundary, data, boundaries->len) == 0 && (best == NULL || best->len < boundaries->len)) { best = boundaries; - if (best->len == len) { + /* If we see "foo--", it could either mean that there + is a boundary named "foo" that ends now or there's + a boundary "foo--" which continues. */ + if (best->len == len || + (best->len == len-2 && trailing_dashes)) { /* This is exactly the wanted boundary. There can't be a better one. */ break; @@ -261,6 +265,7 @@ boundary_line_find(struct message_parser_ctx *ctx, return 0; } size_t find_size = size; + bool trailing_dashes = FALSE; if (lf_pos != NULL) { find_size = lf_pos - data; @@ -268,11 +273,12 @@ boundary_line_find(struct message_parser_ctx *ctx, find_size--; if (find_size > 2 && data[find_size-1] == '-' && data[find_size-2] == '-') - find_size -= 2; + trailing_dashes = TRUE; } else if (find_size > BOUNDARY_END_MAX_LEN) find_size = BOUNDARY_END_MAX_LEN; - *boundary_r = boundary_find(ctx->boundaries, data, find_size); + *boundary_r = boundary_find(ctx->boundaries, data, find_size, + trailing_dashes); if (*boundary_r == NULL) return -1; diff --git a/src/lib-mail/test-message-parser.c b/src/lib-mail/test-message-parser.c index 1b782f5df2..641edec4be 100644 --- a/src/lib-mail/test-message-parser.c +++ b/src/lib-mail/test-message-parser.c @@ -532,6 +532,51 @@ static const char input_msg[] = test_end(); } +static void test_message_parser_trailing_dashes(void) +{ +static const char input_msg[] = +"Content-Type: multipart/mixed; boundary=\"a--\"\n" +"\n" +"--a--\n" +"Content-Type: multipart/mixed; boundary=\"a----\"\n" +"\n" +"--a----\n" +"Content-Type: text/plain\n" +"\n" +"body\n" +"--a------\n" +"Content-Type: text/html\n" +"\n" +"body2\n" +"--a----"; + struct message_parser_ctx *parser; + struct istream *input; + struct message_part *parts; + struct message_block block; + pool_t pool; + int ret; + + test_begin("message parser trailing dashes"); + pool = pool_alloconly_create("message parser", 10240); + input = test_istream_create(input_msg); + + parser = message_parser_init(pool, input, &set_empty); + while ((ret = message_parser_parse_next_block(parser, &block)) > 0) ; + test_assert(ret < 0); + message_parser_deinit(&parser, &parts); + + test_assert(parts->children_count == 2); + test_assert(parts->children->next == NULL); + test_assert(parts->children->children_count == 1); + test_assert(parts->children->children->next == NULL); + test_assert(parts->children->children->children_count == 0); + + test_parsed_parts(input, parts); + i_stream_unref(&input); + pool_unref(&pool); + test_end(); +} + static void test_message_parser_continuing_mime_boundary(void) { static const char input_msg[] = @@ -1095,6 +1140,7 @@ int main(void) test_message_parser_empty_multipart, test_message_parser_duplicate_mime_boundary, test_message_parser_garbage_suffix_mime_boundary, + test_message_parser_trailing_dashes, test_message_parser_continuing_mime_boundary, test_message_parser_continuing_truncated_mime_boundary, test_message_parser_continuing_mime_boundary_reverse, -- 2.11.0