|
David Bremner-2 |
|
|
I haven't written user docs yet. I'm considering whether I can improve
this small task by doing the larger task of splitting the man page and having notmuch help call man. The code is cleaned up quite a bit, the bug fix for 8bit characters is rolled into the series, and following a suggestion of Jani, restore now auto-detects what format the input is in. I left the default dump format as "sup" (old style) in this round. I'm leary of breaking people's scripts, so my current proposal is deprecate the sup format (as default) for a release or two. _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
From: David Bremner <[hidden email]>
The character set is chosen to be suitable for pathnames, and the same as that used by contrib/nmbug --- util/Makefile.local | 2 +- util/hex-escape.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++ util/hex-escape.h | 32 +++++++++++ 3 files changed, 189 insertions(+), 1 deletions(-) create mode 100644 util/hex-escape.c create mode 100644 util/hex-escape.h diff --git a/util/Makefile.local b/util/Makefile.local index 26e4c3f..2e63932 100644 --- a/util/Makefile.local +++ b/util/Makefile.local @@ -3,7 +3,7 @@ dir := util extra_cflags += -I$(srcdir)/$(dir) -libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c +libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c libutil_modules := $(libutil_c_srcs:.c=.o) diff --git a/util/hex-escape.c b/util/hex-escape.c new file mode 100644 index 0000000..f58b9a2 --- /dev/null +++ b/util/hex-escape.c @@ -0,0 +1,156 @@ +/* pathname.c - Manage encoding and decoding of byte strings into path names + * + * Copyright (c) 2011 David Bremner + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/ . + * + * Author: David Bremner <[hidden email]> + */ + +#include <assert.h> +#include <string.h> +#include <talloc.h> +#include "error_util.h" +#include "hex-escape.h" + +static const size_t default_buf_size = 1024; + +static const char *output_charset = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-_@=.:,"; + +static const int escape_char = '%'; + +static int +is_output (char c) +{ + return (strchr (output_charset, c) != NULL); +} + +static int +maybe_realloc (void *ctx, size_t needed, char **out, size_t *out_size) +{ + if (*out_size < needed) { + + if (*out == NULL) + *out = talloc_size (ctx, needed); + else + *out = talloc_realloc (ctx, *out, char, needed); + + if (*out == NULL) + return 0; + + *out_size = needed; + } + return 1; +} + +hex_status_t +hex_encode (void *ctx, const char *in, char **out, size_t *out_size) +{ + + const unsigned char *p; + char *q; + + size_t escape_count = 0; + size_t len = 0; + size_t needed; + + assert (ctx); assert (in); assert (out); assert (out_size); + + for (p = (unsigned char *) in; *p; p++) { + escape_count += (!is_output (*p)); + len++; + } + + needed = len + escape_count * 2 + 1; + + if (*out == NULL) + *out_size = 0; + + if (!maybe_realloc (ctx, needed, out, out_size)) + return HEX_OUT_OF_MEMORY; + + q = *out; + p = (unsigned char *) in; + + while (*p) { + if (is_output (*p)) { + *q++ = *p++; + } else { + sprintf (q, "%%%02x", *p++); + q += 3; + } + } + + *q = '\0'; + return HEX_SUCCESS; +} + +hex_status_t +hex_decode (void *ctx, const char *in, char **out, size_t * out_size) +{ + + char buf[3]; + + const char *p; + unsigned char *q; + + size_t escape_count = 0; + size_t needed = 0; + + assert (ctx); assert (in); assert (out); assert (out_size); + + size_t len = strlen (in); + + for (p = in; *p; p++) + escape_count += (*p == escape_char); + + needed = len - escape_count * 2 + 1; + + if (!maybe_realloc (ctx, needed, out, out_size)) + return HEX_OUT_OF_MEMORY; + + p = in; + q = (unsigned char *) *out; + buf[2] = 0; + + while (*p) { + + if (*p == escape_char) { + + char *endp; + + if (len < 3) + return HEX_SYNTAX_ERROR; + + buf[0] = p[1]; + buf[1] = p[2]; + + *q = strtol (buf, &endp, 16); + + if (endp != buf + 2) + return HEX_SYNTAX_ERROR; + + len -= 3; + p += 3; + q++; + } else { + *q++ = *p++; + } + } + + *q = '\0'; + + return HEX_SUCCESS; +} diff --git a/util/hex-escape.h b/util/hex-escape.h new file mode 100644 index 0000000..e409626 --- /dev/null +++ b/util/hex-escape.h @@ -0,0 +1,32 @@ +#ifndef _HEX_ESCAPE_H +#define _HEX_ESCAPE_H + +typedef enum hex_status { + HEX_SUCCESS = 0, + HEX_SYNTAX_ERROR, + HEX_OUT_OF_MEMORY +} hex_status_t; + +/* + * The API is modelled on that for getline. + * + * If 'out' points to a NULL pointer a char array of the appropriate + * size is allocated using talloc, and out_size is updated. + * + * If 'out' points to a non-NULL pointer, it assumed to describe an + * existing char array, with the size given in *out_size. This array + * may be resized by talloc_realloc if needed; in this case *out_size + * will also be updated. + * + * Note that it is an error to pass a NULL pointer for any parameter + * of these routines. + */ + +hex_status_t +hex_encode (void *talloc_ctx, const char *in, char **out, + size_t *out_size); + +hex_status_t +hex_decode (void *talloc_ctx, const char *in, char **out, + size_t *out_size); +#endif -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
This program is used both as a test-bed/unit-tester for ../util/hex-escape.c, and also as a utility in future tests of dump and restore. --- test/.gitignore | 1 + test/Makefile.local | 6 +++- test/basic | 2 +- test/hex-xcode.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 test/hex-xcode.c diff --git a/test/.gitignore b/test/.gitignore index e63c689..be7ab5e 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -3,4 +3,5 @@ corpus.mail smtp-dummy symbol-test arg-test +hex-xcode tmp.* diff --git a/test/Makefile.local b/test/Makefile.local index fa2df73..ec507c4 100644 --- a/test/Makefile.local +++ b/test/Makefile.local @@ -13,6 +13,9 @@ smtp_dummy_modules = $(smtp_dummy_srcs:.c=.o) $(dir)/arg-test: $(dir)/arg-test.o command-line-arguments.o util/libutil.a $(call quiet,CC) -I. $^ -o $@ +$(dir)/hex-xcode: $(dir)/hex-xcode.o util/libutil.a + $(call quiet,CC) -I. $^ -o $@ -ltalloc + $(dir)/smtp-dummy: $(smtp_dummy_modules) $(call quiet,CC) $^ -o $@ @@ -21,7 +24,8 @@ $(dir)/symbol-test: $(dir)/symbol-test.o .PHONY: test check -test-binaries: $(dir)/arg-test $(dir)/smtp-dummy $(dir)/symbol-test +test-binaries: $(dir)/arg-test $(dir)/hex-xcode \ + $(dir)/smtp-dummy $(dir)/symbol-test test: all test-binaries @${dir}/notmuch-test $(OPTIONS) diff --git a/test/basic b/test/basic index d6aed24..af57026 100755 --- a/test/basic +++ b/test/basic @@ -54,7 +54,7 @@ test_begin_subtest 'Ensure that all available tests will be run by notmuch-test' eval $(sed -n -e '/^TESTS="$/,/^"$/p' $TEST_DIRECTORY/notmuch-test) tests_in_suite=$(for i in $TESTS; do echo $i; done | sort) available=$(find "$TEST_DIRECTORY" -maxdepth 1 -type f -executable -printf '%f\n' | \ - sed -r -e "/^(aggregate-results.sh|notmuch-test|smtp-dummy|test-verbose|symbol-test|arg-test)$/d" | \ + sed -r -e "/^(aggregate-results.sh|notmuch-test|smtp-dummy|test-verbose|symbol-test|arg-test|hex-xcode)$/d" | \ sort) test_expect_equal "$tests_in_suite" "$available" diff --git a/test/hex-xcode.c b/test/hex-xcode.c new file mode 100644 index 0000000..20c1603 --- /dev/null +++ b/test/hex-xcode.c @@ -0,0 +1,78 @@ +/* No, nothing to to with IDE from Apple Inc. + testbed for ../util/hex-escape.c. + + usage: hex-xcode (e|d) < foo + + e is for encode + d is for decode + */ + +#include "notmuch-client.h" +#include "hex-escape.h" +#include <assert.h> + +static int +xcode (void *ctx, char dir, char *in, char **buf_p, size_t *size_p) +{ + hex_status_t status; + + if (dir == 'e') + status = hex_encode (ctx, in, buf_p, size_p); + else + status = hex_decode (ctx, in, buf_p, size_p); + + if (status == HEX_SUCCESS) + puts (*buf_p); + + return status; +} + + +int +main (int argc, char **argv) +{ + + assert (argc > 1 && argv[1]); + + char dir = argv[1][0]; + + void *ctx = talloc_new (NULL); + + char *line = NULL; + size_t line_size; + ssize_t line_len; + + char *buffer = NULL; + size_t buf_size = 0; + + int arg_index = 2; + notmuch_bool_t read_stdin = TRUE; + + for (arg_index = 2; arg_index < argc; arg_index++) { + + if (xcode (ctx, dir, argv[arg_index], + &buffer, &buf_size) != HEX_SUCCESS) + return 1; + + read_stdin = FALSE; + } + + if (!read_stdin) + return 0; + + while ((line_len = getline (&line, &line_size, stdin)) != -1) { + + chomp_newline (line); + + if (xcode (ctx, dir, line, &buffer, &buf_size) != HEX_SUCCESS) + return 1; + + } + + if (line) + free (line); + + talloc_free (ctx); + + return 0; +} -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
These are more like unit tests, to (try to) make sure the library functionality is working before building more complicated things on top of it. --- test/hex-escaping | 26 ++++++++++++++++++++++++++ test/notmuch-test | 1 + 2 files changed, 27 insertions(+), 0 deletions(-) create mode 100755 test/hex-escaping diff --git a/test/hex-escaping b/test/hex-escaping new file mode 100755 index 0000000..daa6446 --- /dev/null +++ b/test/hex-escaping @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +test_description="hex encoding and decoding" +. ./test-lib.sh + +test_begin_subtest "round trip" +find $TEST_DIRECTORY/corpus -type f -print | sort | xargs cat > EXPECTED +$TEST_DIRECTORY/hex-xcode e < EXPECTED | $TEST_DIRECTORY/hex-xcode d > OUTPUT +test_expect_equal_file OUTPUT EXPECTED + +test_begin_subtest "punctuation" +tag1='comic_swear=$&^%$^%\\//-+$^%$' +tag_enc1=$($TEST_DIRECTORY/hex-xcode e "$tag1") +test_expect_equal "$tag_enc1" "comic_swear=%24%26%5e%25%24%5e%25%5c%5c%2f%2f-+%24%5e%25%24" + +test_begin_subtest "round trip newlines" +printf 'this\n tag\t has\n spaces\n' > EXPECTED.$test_count +$TEST_DIRECTORY/hex-xcode e < EXPECTED.$test_count |\ + $TEST_DIRECTORY/hex-xcode d > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +test_begin_subtest "round trip 8bit chars" +echo '%c3%91%c3%a5%c3%b0%c3%a3%c3%a5%c3%a9-%c3%8f%c3%8a' > EXPECTED.$test_count +$TEST_DIRECTORY/hex-xcode d < EXPECTED.$test_count |\ + $TEST_DIRECTORY/hex-xcode e > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count +test_done diff --git a/test/notmuch-test b/test/notmuch-test index ded79e8..af72c73 100755 --- a/test/notmuch-test +++ b/test/notmuch-test @@ -50,6 +50,7 @@ TESTS=" python hooks argument-parsing + hex-escaping " TESTS=${NOTMUCH_TESTS:=$TESTS} -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
sup is the old format, and remains the default. Each line of the notmuch format is "msg_id tag tag...tag" where each space seperated token is 'hex-encoded' to remove troubling characters. In particular this format won't have the same problem with e.g. spaces in message-ids or tags; they will be round-trip-able. --- dump-restore-private.h | 12 ++++++++++++ notmuch-dump.c | 47 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 51 insertions(+), 8 deletions(-) create mode 100644 dump-restore-private.h diff --git a/dump-restore-private.h b/dump-restore-private.h new file mode 100644 index 0000000..34a5022 --- /dev/null +++ b/dump-restore-private.h @@ -0,0 +1,12 @@ +#ifndef DUMP_RESTORE_PRIVATE_H +#define DUMP_RESTORE_PRIVATE_H + +#include "hex-escape.h" +#include "command-line-arguments.h" + +typedef enum dump_formats { + DUMP_FORMAT_SUP, + DUMP_FORMAT_NOTMUCH +} dump_format_t; + +#endif diff --git a/notmuch-dump.c b/notmuch-dump.c index a735875..0231db2 100644 --- a/notmuch-dump.c +++ b/notmuch-dump.c @@ -19,6 +19,7 @@ */ #include "notmuch-client.h" +#include "dump-restore-private.h" int notmuch_dump_command (unused (void *ctx), int argc, char *argv[]) @@ -44,9 +45,15 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[]) char *output_file_name = NULL; int opt_index; + int output_format = DUMP_FORMAT_SUP; + notmuch_opt_desc_t options[] = { - { NOTMUCH_OPT_POSITION, &output_file_name, 0, 0, 0 }, - { 0, 0, 0, 0, 0 } + { NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f', + (notmuch_keyword_t []){ { "sup", DUMP_FORMAT_SUP }, + { "notmuch", DUMP_FORMAT_NOTMUCH }, + {0, 0} } }, + { NOTMUCH_OPT_POSITION, &output_file_name, 0, 0, 0 }, + { 0, 0, 0, 0, 0 } }; opt_index = parse_arguments (argc, argv, options, 1); @@ -85,29 +92,53 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[]) */ notmuch_query_set_sort (query, NOTMUCH_SORT_UNSORTED); + char *buffer = NULL; + size_t buffer_size = 0; + for (messages = notmuch_query_search_messages (query); notmuch_messages_valid (messages); notmuch_messages_move_to_next (messages)) { int first = 1; - message = notmuch_messages_get (messages); + const char *message_id; - fprintf (output, - "%s (", notmuch_message_get_message_id (message)); + message = notmuch_messages_get (messages); + message_id = notmuch_message_get_message_id (message); + + if (output_format == DUMP_FORMAT_SUP) { + fprintf (output, "%s (", message_id); + } else { + if (hex_encode (notmuch, message_id, + &buffer, &buffer_size) != HEX_SUCCESS) + return 1; + fprintf (output, "%s ", buffer); + } for (tags = notmuch_message_get_tags (message); notmuch_tags_valid (tags); notmuch_tags_move_to_next (tags)) { + const char *tag_str = notmuch_tags_get (tags); + if (! first) - fprintf (output, " "); + fputs (" ", output); - fprintf (output, "%s", notmuch_tags_get (tags)); + if (output_format == DUMP_FORMAT_SUP) { + fputs (tag_str, output); + } else { + if (hex_encode (notmuch, tag_str, + &buffer, &buffer_size) != HEX_SUCCESS) + return 1; + fputs (buffer, output); + } first = 0; } - fprintf (output, ")\n"); + if (output_format == DUMP_FORMAT_SUP) + fputs (")\n", output); + else + fputs ("\n", output); notmuch_message_destroy (message); } -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
The first test is really to test our assumptions about the corpus, namely that a certain set of message-id's is safe (i.e. doesn't change under hex-escaping). We then check dump output as best we can without functionality-to-come in notmuch-restore. --- test/dump-restore | 12 ++++++++++++ 1 files changed, 12 insertions(+), 0 deletions(-) diff --git a/test/dump-restore b/test/dump-restore index 439e998..48caf4e 100755 --- a/test/dump-restore +++ b/test/dump-restore @@ -82,4 +82,16 @@ test_begin_subtest "dump outfile -- from:cworth" notmuch dump dump-outfile-dash-inbox.actual -- from:cworth test_expect_equal_file dump-cworth.expected dump-outfile-dash-inbox.actual +test_begin_subtest "Check for a safe set of message-ids" +notmuch search --output=messages from:cworth > EXPECTED.$test_count +notmuch search --output=messages from:cworth |\ + $TEST_DIRECTORY/hex-xcode e > OUTPUT.$test_count +test_expect_equal_file OUTPUT.$test_count EXPECTED.$test_count + +# we have observed that cworth has sane message-ids, and hopefully sane tags. +test_begin_subtest "dump --format=notmuch -- from:cworth" +notmuch dump --format=sup -- from:cworth | tr -d \(\) > EXPECTED.$test_count +notmuch dump --format=notmuch -- from:cworth > OUTPUT.$test_count +test_expect_equal_file OUTPUT.$test_count EXPECTED.$test_count + test_done -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
This is format is whitespace separated tokens, encoded by util/hex-escape.c --- notmuch-restore.c | 81 +++++++++++++++++++++++++++++++++++++++++------------ 1 files changed, 63 insertions(+), 18 deletions(-) diff --git a/notmuch-restore.c b/notmuch-restore.c index 87d9772..f618b47 100644 --- a/notmuch-restore.c +++ b/notmuch-restore.c @@ -19,6 +19,7 @@ */ #include "notmuch-client.h" +#include "dump-restore-private.h" int notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) @@ -35,6 +36,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) regex_t regex; int rerr; int opt_index; + int input_format = DUMP_FORMAT_SUP; config = notmuch_config_open (ctx, NULL, NULL); if (config == NULL) @@ -48,6 +50,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) synchronize_flags = notmuch_config_get_maildir_synchronize_flags (config); notmuch_opt_desc_t options[] = { + FORMAT_DESC (input_format), { NOTMUCH_OPT_POSITION, &input_file_name, 0, 0, 0 }, { NOTMUCH_OPT_BOOLEAN, &accumulate, "accumulate", 'a', 0 }, { 0, 0, 0, 0, 0 } @@ -81,33 +84,63 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) * non-space characters for the message-id, then one or more * spaces, then a list of space-separated tags as a sequence of * characters within literal '(' and ')'. */ - if ( xregcomp (®ex, - "^([^ ]+) \\(([^)]*)\\)$", - REG_EXTENDED) ) - INTERNAL_ERROR("compile time constant regex failed."); + if (input_format == DUMP_FORMAT_SUP) { + if ( xregcomp (®ex, + "^([^ ]+) \\(([^)]*)\\)$", + REG_EXTENDED) ) + INTERNAL_ERROR("compile time constant regex failed."); + } + + + /* These are out here to re-use the buffers with hex_decode */ + + char *message_id = NULL; + size_t message_id_size = 0; + char *tag = NULL; + size_t tag_size = 0; while ((line_len = getline (&line, &line_size, input)) != -1) { regmatch_t match[3]; - char *message_id, *file_tags, *tag, *next; + char *file_tags, *next; notmuch_message_t *message = NULL; + notmuch_status_t status; notmuch_tags_t *db_tags; char *db_tags_str; chomp_newline (line); - rerr = xregexec (®ex, line, 3, match, 0); - if (rerr == REG_NOMATCH) - { - fprintf (stderr, "Warning: Ignoring invalid input line: %s\n", - line); + /* Silently ignore blank lines */ + + if (line[0] == '\0') { continue; } - message_id = xstrndup (line + match[1].rm_so, - match[1].rm_eo - match[1].rm_so); - file_tags = xstrndup (line + match[2].rm_so, - match[2].rm_eo - match[2].rm_so); + if (input_format == DUMP_FORMAT_SUP) { + rerr = xregexec (®ex, line, 3, match, 0); + if (rerr == REG_NOMATCH) + { + fprintf (stderr, "Warning: Ignoring invalid input line: %s\n", + line); + continue; + } + message_id = talloc_strndup (notmuch, line + match[1].rm_so, + match[1].rm_eo - match[1].rm_so); + file_tags = talloc_strndup (notmuch, line + match[2].rm_so, + match[2].rm_eo - match[2].rm_so); + } else { + char *p = line; + char *raw_mid; + + raw_mid = strsep (&p, " \t"); + + if (hex_decode (notmuch, raw_mid, + &message_id, &message_id_size) != HEX_SUCCESS) + return 1; + + file_tags = xstrdup (p); + } + status = notmuch_database_find_message (notmuch, message_id, &message); if (status || message == NULL) { @@ -153,7 +186,16 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) next = file_tags; while (next) { - tag = strsep (&next, " "); + char *raw_tag = strsep (&next, " "); + + if (input_format == DUMP_FORMAT_NOTMUCH) { + if (hex_decode (notmuch, raw_tag, + &tag, &tag_size) != HEX_SUCCESS) + return 1; + } else { + tag = talloc_strdup (notmuch, raw_tag); + } + if (*tag == '\0') continue; status = notmuch_message_add_tag (message, tag); @@ -175,11 +217,14 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) if (message) notmuch_message_destroy (message); message = NULL; - free (message_id); - free (file_tags); + if (input_format == DUMP_FORMAT_SUP) { + talloc_free (message_id); + talloc_free (file_tags); + } } - regfree (®ex); + if (input_format == DUMP_FORMAT_SUP) + regfree (®ex); if (line) free (line); -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
These one need the completed functionality in notmuch-restore. Fairly exotic tags are tested, but no weird message id's. --- test/dump-restore | 36 ++++++++++++++++++++++++++++++++++++ 1 files changed, 36 insertions(+), 0 deletions(-) diff --git a/test/dump-restore b/test/dump-restore index 48caf4e..ef4a63e 100755 --- a/test/dump-restore +++ b/test/dump-restore @@ -94,4 +94,40 @@ notmuch dump --format=sup -- from:cworth | tr -d \(\) > EXPECTED.$test_count notmuch dump --format=notmuch -- from:cworth > OUTPUT.$test_count test_expect_equal_file OUTPUT.$test_count EXPECTED.$test_count +test_begin_subtest "format=notmuch, # round-trip" +notmuch dump --format=sup | sort > EXPECTED.$test_count +notmuch dump --format=notmuch | notmuch restore --format=notmuch +notmuch dump --format=sup | sort > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +tag1='comic_swear=$&^%$^%\\//-+$^%$' +enc1=$($TEST_DIRECTORY/hex-xcode e "$tag1") + +tag2=$(printf 'this\n tag\t has\n spaces') +enc2=$($TEST_DIRECTORY/hex-xcode e "$tag2") + +enc3='%c3%91%c3%a5%c3%b0%c3%a3%c3%a5%c3%a9-%c3%8f%c3%8a' +tag3=$($TEST_DIRECTORY/hex-xcode d $enc3) + +notmuch dump --format=notmuch > BACKUP + +notmuch tag +"$tag1" +"$tag2" +"$tag3" -inbox -unread "*" + +test_begin_subtest 'format=notmuch, round trip with strange tags' + notmuch dump --format=notmuch > EXPECTED.$test_count + notmuch dump --format=notmuch | notmuch restore --format=notmuch + notmuch dump --format=notmuch > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + + +test_begin_subtest 'format=notmuch, checking encoded output' + cp /dev/null EXPECTED.$test_count + notmuch dump --format=notmuch -- from:cworth |\ + awk "{ print \$1 \" $enc1 $enc2 $enc3\" }" > EXPECTED.$test_count + + notmuch dump --format=notmuch -- from:cworth > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +notmuch restore --format=notmuch < BACKUP + test_done -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
The simple heuristic relies on the fact that '(' is not part of the character set used by hex-escape. Since hex-escape is designed to be OK for pathnames (and shells), this seems like a reasonable assumption. In principle the --format argument to notmuch-restore is notmuch needed at this point, but it adds literally 5 lines of argument description, so I left it. --- dump-restore-private.h | 5 +++-- notmuch-restore.c | 45 ++++++++++++++++++++++++++++++++------------- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/dump-restore-private.h b/dump-restore-private.h index 34a5022..67795e5 100644 --- a/dump-restore-private.h +++ b/dump-restore-private.h @@ -5,8 +5,9 @@ #include "command-line-arguments.h" typedef enum dump_formats { - DUMP_FORMAT_SUP, - DUMP_FORMAT_NOTMUCH + DUMP_FORMAT_AUTO, + DUMP_FORMAT_NOTMUCH, + DUMP_FORMAT_SUP } dump_format_t; #endif diff --git a/notmuch-restore.c b/notmuch-restore.c index f618b47..340f31f 100644 --- a/notmuch-restore.c +++ b/notmuch-restore.c @@ -36,7 +36,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) regex_t regex; int rerr; int opt_index; - int input_format = DUMP_FORMAT_SUP; + int input_format = DUMP_FORMAT_AUTO; config = notmuch_config_open (ctx, NULL, NULL); if (config == NULL) @@ -50,7 +50,11 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) synchronize_flags = notmuch_config_get_maildir_synchronize_flags (config); notmuch_opt_desc_t options[] = { - FORMAT_DESC (input_format), + { NOTMUCH_OPT_KEYWORD, &input_format, "format", 'f', + (notmuch_keyword_t []){ { "auto", DUMP_FORMAT_AUTO }, + { "notmuch", DUMP_FORMAT_NOTMUCH }, + { "sup", DUMP_FORMAT_SUP }, + {0, 0} } }, { NOTMUCH_OPT_POSITION, &input_file_name, 0, 0, 0 }, { NOTMUCH_OPT_BOOLEAN, &accumulate, "accumulate", 'a', 0 }, { 0, 0, 0, 0, 0 } @@ -80,17 +84,6 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) return 1; } - /* Dump output is one line per message. We match a sequence of - * non-space characters for the message-id, then one or more - * spaces, then a list of space-separated tags as a sequence of - * characters within literal '(' and ')'. */ - if (input_format == DUMP_FORMAT_SUP) { - if ( xregcomp (®ex, - "^([^ ]+) \\(([^)]*)\\)$", - REG_EXTENDED) ) - INTERNAL_ERROR("compile time constant regex failed."); - } - /* These are out here to re-use the buffers with hex_decode */ @@ -98,6 +91,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) size_t message_id_size = 0; char *tag = NULL; size_t tag_size = 0; + notmuch_bool_t first_line = TRUE; while ((line_len = getline (&line, &line_size, input)) != -1) { regmatch_t match[3]; @@ -109,6 +103,30 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) char *db_tags_str; chomp_newline (line); + if (first_line && input_format == DUMP_FORMAT_AUTO) { + char *p; + + for (p = line; *p; p++) { + if (*p == '(') + input_format = DUMP_FORMAT_SUP; + } + + if (input_format == DUMP_FORMAT_AUTO) + input_format = DUMP_FORMAT_NOTMUCH; + + } + + /* sup dump output is one line per message. We match a + * sequence of non-space characters for the message-id, then + * one or more spaces, then a list of space-separated tags as + * a sequence of characters within literal '(' and ')'. */ + if (first_line && input_format == DUMP_FORMAT_SUP) { + if ( xregcomp (®ex, + "^([^ ]+) \\(([^)]*)\\)$", + REG_EXTENDED) ) + INTERNAL_ERROR("compile time constant regex failed."); + } + /* Silently ignore blank lines */ @@ -141,6 +159,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) file_tags = xstrdup (p); } + first_line = FALSE; status = notmuch_database_find_message (notmuch, message_id, &message); if (status || message == NULL) { -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
We start by restoring the original tags, and making sure that restore worked. Next we test each possible input to autodetection, both explicit (with --format=auto) and implicit (without --format). --- test/dump-restore | 34 ++++++++++++++++++++++++++++++++-- 1 files changed, 32 insertions(+), 2 deletions(-) diff --git a/test/dump-restore b/test/dump-restore index ef4a63e..8400c7b 100755 --- a/test/dump-restore +++ b/test/dump-restore @@ -119,7 +119,6 @@ test_begin_subtest 'format=notmuch, round trip with strange tags' notmuch dump --format=notmuch > OUTPUT.$test_count test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count - test_begin_subtest 'format=notmuch, checking encoded output' cp /dev/null EXPECTED.$test_count notmuch dump --format=notmuch -- from:cworth |\ @@ -128,6 +127,37 @@ test_begin_subtest 'format=notmuch, checking encoded output' notmuch dump --format=notmuch -- from:cworth > OUTPUT.$test_count test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count -notmuch restore --format=notmuch < BACKUP +test_begin_subtest 'restoring sane tags' + notmuch restore --format=notmuch < BACKUP + notmuch dump --format=notmuch > OUTPUT.$test_count +test_expect_equal_file BACKUP OUTPUT.$test_count + +test_begin_subtest 'format=notmuch, restore=auto' + notmuch dump --format=notmuch > EXPECTED.$test_count + notmuch tag -inbox -unread "*" + notmuch restore --format=auto < EXPECTED.$test_count + notmuch dump --format=notmuch > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +test_begin_subtest 'format=sup, restore=auto' + notmuch dump --format=sup > EXPECTED.$test_count + notmuch tag -inbox -unread "*" + notmuch restore --format=auto < EXPECTED.$test_count + notmuch dump --format=sup > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +test_begin_subtest 'format=notmuch, restore=default' + notmuch dump --format=notmuch > EXPECTED.$test_count + notmuch tag -inbox -unread "*" + notmuch restore < EXPECTED.$test_count + notmuch dump --format=notmuch > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +test_begin_subtest 'format=sup, restore=default' + notmuch dump --format=sup > EXPECTED.$test_count + notmuch tag -inbox -unread "*" + notmuch restore < EXPECTED.$test_count + notmuch dump --format=sup > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count test_done -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
Jameson Graef Rollins |
|
|
In reply to this post by David Bremner-2
On Sun, 18 Dec 2011 09:15:02 -0400, David Bremner <[hidden email]> wrote:
> I left the default dump format as "sup" (old style) in this round. I'm > leary of breaking people's scripts, so my current proposal is > deprecate the sup format (as default) for a release or two. It seems to me that as long as restore can detect what format the dump file is in, it shouldn't matter what the default dump format is, right? If someone makes a new dump in the new format, they'll presumably also be restoring with the new code that supports that format as well. I'm having a hard time imagining a situation where moving to a new fully-supported default dump format would break anything. jamie. _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
On Sun, 18 Dec 2011 12:51:56 -0800, Jameson Graef Rollins <[hidden email]> wrote:
> It seems to me that as long as restore can detect what format the dump > file is in, it shouldn't matter what the default dump format is, right? > If someone makes a new dump in the new format, they'll presumably also > be restoring with the new code that supports that format as well. I'm > having a hard time imagining a situation where moving to a new > fully-supported default dump format would break anything. Well, nmbug would break, to take a random example ;). I expect other people parse the dump output for their own neferious purposes. d _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
This version of this series add fairly extensive testing with strange
message ids full of spaces and punctuation, and some documentation. _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
From: David Bremner <[hidden email]>
The character set is chosen to be suitable for pathnames, and the same as that used by contrib/nmbug --- util/Makefile.local | 2 +- util/hex-escape.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++++++ util/hex-escape.h | 32 +++++++++++ 3 files changed, 189 insertions(+), 1 deletions(-) create mode 100644 util/hex-escape.c create mode 100644 util/hex-escape.h diff --git a/util/Makefile.local b/util/Makefile.local index 26e4c3f..2e63932 100644 --- a/util/Makefile.local +++ b/util/Makefile.local @@ -3,7 +3,7 @@ dir := util extra_cflags += -I$(srcdir)/$(dir) -libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c +libutil_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c libutil_modules := $(libutil_c_srcs:.c=.o) diff --git a/util/hex-escape.c b/util/hex-escape.c new file mode 100644 index 0000000..6c1260b --- /dev/null +++ b/util/hex-escape.c @@ -0,0 +1,156 @@ +/* hex-escape.c - Manage encoding and decoding of byte strings into path names + * + * Copyright (c) 2011 David Bremner + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/ . + * + * Author: David Bremner <[hidden email]> + */ + +#include <assert.h> +#include <string.h> +#include <talloc.h> +#include "error_util.h" +#include "hex-escape.h" + +static const size_t default_buf_size = 1024; + +static const char *output_charset = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-_@=.:,"; + +static const int escape_char = '%'; + +static int +is_output (char c) +{ + return (strchr (output_charset, c) != NULL); +} + +static int +maybe_realloc (void *ctx, size_t needed, char **out, size_t *out_size) +{ + if (*out_size < needed) { + + if (*out == NULL) + *out = talloc_size (ctx, needed); + else + *out = talloc_realloc (ctx, *out, char, needed); + + if (*out == NULL) + return 0; + + *out_size = needed; + } + return 1; +} + +hex_status_t +hex_encode (void *ctx, const char *in, char **out, size_t *out_size) +{ + + const unsigned char *p; + char *q; + + size_t escape_count = 0; + size_t len = 0; + size_t needed; + + assert (ctx); assert (in); assert (out); assert (out_size); + + for (p = (unsigned char *) in; *p; p++) { + escape_count += (!is_output (*p)); + len++; + } + + needed = len + escape_count * 2 + 1; + + if (*out == NULL) + *out_size = 0; + + if (!maybe_realloc (ctx, needed, out, out_size)) + return HEX_OUT_OF_MEMORY; + + q = *out; + p = (unsigned char *) in; + + while (*p) { + if (is_output (*p)) { + *q++ = *p++; + } else { + sprintf (q, "%%%02x", *p++); + q += 3; + } + } + + *q = '\0'; + return HEX_SUCCESS; +} + +hex_status_t +hex_decode (void *ctx, const char *in, char **out, size_t * out_size) +{ + + char buf[3]; + + const char *p; + unsigned char *q; + + size_t escape_count = 0; + size_t needed = 0; + + assert (ctx); assert (in); assert (out); assert (out_size); + + size_t len = strlen (in); + + for (p = in; *p; p++) + escape_count += (*p == escape_char); + + needed = len - escape_count * 2 + 1; + + if (!maybe_realloc (ctx, needed, out, out_size)) + return HEX_OUT_OF_MEMORY; + + p = in; + q = (unsigned char *) *out; + buf[2] = 0; + + while (*p) { + + if (*p == escape_char) { + + char *endp; + + if (len < 3) + return HEX_SYNTAX_ERROR; + + buf[0] = p[1]; + buf[1] = p[2]; + + *q = strtol (buf, &endp, 16); + + if (endp != buf + 2) + return HEX_SYNTAX_ERROR; + + len -= 3; + p += 3; + q++; + } else { + *q++ = *p++; + } + } + + *q = '\0'; + + return HEX_SUCCESS; +} diff --git a/util/hex-escape.h b/util/hex-escape.h new file mode 100644 index 0000000..e409626 --- /dev/null +++ b/util/hex-escape.h @@ -0,0 +1,32 @@ +#ifndef _HEX_ESCAPE_H +#define _HEX_ESCAPE_H + +typedef enum hex_status { + HEX_SUCCESS = 0, + HEX_SYNTAX_ERROR, + HEX_OUT_OF_MEMORY +} hex_status_t; + +/* + * The API is modelled on that for getline. + * + * If 'out' points to a NULL pointer a char array of the appropriate + * size is allocated using talloc, and out_size is updated. + * + * If 'out' points to a non-NULL pointer, it assumed to describe an + * existing char array, with the size given in *out_size. This array + * may be resized by talloc_realloc if needed; in this case *out_size + * will also be updated. + * + * Note that it is an error to pass a NULL pointer for any parameter + * of these routines. + */ + +hex_status_t +hex_encode (void *talloc_ctx, const char *in, char **out, + size_t *out_size); + +hex_status_t +hex_decode (void *talloc_ctx, const char *in, char **out, + size_t *out_size); +#endif -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
This program is used both as a test-bed/unit-tester for ../util/hex-escape.c, and also as a utility in future tests of dump and restore. --- test/.gitignore | 1 + test/Makefile.local | 6 ++- test/basic | 2 +- test/hex-xcode.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 110 insertions(+), 2 deletions(-) create mode 100644 test/hex-xcode.c diff --git a/test/.gitignore b/test/.gitignore index e63c689..be7ab5e 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -3,4 +3,5 @@ corpus.mail smtp-dummy symbol-test arg-test +hex-xcode tmp.* diff --git a/test/Makefile.local b/test/Makefile.local index fa2df73..ba697f4 100644 --- a/test/Makefile.local +++ b/test/Makefile.local @@ -13,6 +13,9 @@ smtp_dummy_modules = $(smtp_dummy_srcs:.c=.o) $(dir)/arg-test: $(dir)/arg-test.o command-line-arguments.o util/libutil.a $(call quiet,CC) -I. $^ -o $@ +$(dir)/hex-xcode: $(dir)/hex-xcode.o command-line-arguments.o util/libutil.a + $(call quiet,CC) -I. $^ -o $@ -ltalloc + $(dir)/smtp-dummy: $(smtp_dummy_modules) $(call quiet,CC) $^ -o $@ @@ -21,7 +24,8 @@ $(dir)/symbol-test: $(dir)/symbol-test.o .PHONY: test check -test-binaries: $(dir)/arg-test $(dir)/smtp-dummy $(dir)/symbol-test +test-binaries: $(dir)/arg-test $(dir)/hex-xcode \ + $(dir)/smtp-dummy $(dir)/symbol-test test: all test-binaries @${dir}/notmuch-test $(OPTIONS) diff --git a/test/basic b/test/basic index d6aed24..af57026 100755 --- a/test/basic +++ b/test/basic @@ -54,7 +54,7 @@ test_begin_subtest 'Ensure that all available tests will be run by notmuch-test' eval $(sed -n -e '/^TESTS="$/,/^"$/p' $TEST_DIRECTORY/notmuch-test) tests_in_suite=$(for i in $TESTS; do echo $i; done | sort) available=$(find "$TEST_DIRECTORY" -maxdepth 1 -type f -executable -printf '%f\n' | \ - sed -r -e "/^(aggregate-results.sh|notmuch-test|smtp-dummy|test-verbose|symbol-test|arg-test)$/d" | \ + sed -r -e "/^(aggregate-results.sh|notmuch-test|smtp-dummy|test-verbose|symbol-test|arg-test|hex-xcode)$/d" | \ sort) test_expect_equal "$tests_in_suite" "$available" diff --git a/test/hex-xcode.c b/test/hex-xcode.c new file mode 100644 index 0000000..eec6541 --- /dev/null +++ b/test/hex-xcode.c @@ -0,0 +1,103 @@ +/* No, nothing to to with IDE from Apple Inc. + testbed for ../util/hex-escape.c. + + usage: + hex-xcode [--direction=(encode|decode)] [--omit-newline] < file + hex-xcode [--direction=(encode|decode)] [--omit-newline] arg1 arg2 arg3 ... + + */ + +#include "notmuch-client.h" +#include "hex-escape.h" +#include <assert.h> + + +enum direction { + ENCODE, + DECODE +}; + +static int +xcode (void *ctx, enum direction dir, char *in, char **buf_p, size_t *size_p) +{ + hex_status_t status; + + if (dir == ENCODE) + status = hex_encode (ctx, in, buf_p, size_p); + else + status = hex_decode (ctx, in, buf_p, size_p); + + if (status == HEX_SUCCESS) + fputs (*buf_p, stdout); + + return status; +} + + +int +main (int argc, char **argv) +{ + + + enum direction dir = DECODE; + int omit_newline = FALSE; + + notmuch_opt_desc_t options[] = { + { NOTMUCH_OPT_KEYWORD, &dir, "direction", 'd', + (notmuch_keyword_t []){ { "encode", ENCODE }, + { "decode", DECODE }, + { 0, 0 } } }, + { NOTMUCH_OPT_BOOLEAN, &omit_newline, "omit-newline", 'n', 0 }, + { 0, 0, 0, 0, 0 } + }; + + int opt_index = parse_arguments (argc, argv, options, 1); + + if (opt_index < 0) + exit (1); + + void *ctx = talloc_new (NULL); + + char *line = NULL; + size_t line_size; + ssize_t line_len; + + char *buffer = NULL; + size_t buf_size = 0; + + notmuch_bool_t read_stdin = TRUE; + + for (; opt_index < argc; opt_index++) { + + if (xcode (ctx, dir, argv[opt_index], + &buffer, &buf_size) != HEX_SUCCESS) + return 1; + + if (!omit_newline) + putchar ('\n'); + + read_stdin = FALSE; + } + + if (!read_stdin) + return 0; + + while ((line_len = getline (&line, &line_size, stdin)) != -1) { + + chomp_newline (line); + + if (xcode (ctx, dir, line, &buffer, &buf_size) != HEX_SUCCESS) + return 1; + + if (!omit_newline) + putchar ('\n'); + + } + + if (line) + free (line); + + talloc_free (ctx); + + return 0; +} -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
These are more like unit tests, to (try to) make sure the library functionality is working before building more complicated things on top of it. --- test/hex-escaping | 26 ++++++++++++++++++++++++++ test/notmuch-test | 1 + 2 files changed, 27 insertions(+), 0 deletions(-) create mode 100755 test/hex-escaping diff --git a/test/hex-escaping b/test/hex-escaping new file mode 100755 index 0000000..f34cc8c --- /dev/null +++ b/test/hex-escaping @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +test_description="hex encoding and decoding" +. ./test-lib.sh + +test_begin_subtest "round trip" +find $TEST_DIRECTORY/corpus -type f -print | sort | xargs cat > EXPECTED +$TEST_DIRECTORY/hex-xcode --direction=encode < EXPECTED | $TEST_DIRECTORY/hex-xcode --direction=decode > OUTPUT +test_expect_equal_file OUTPUT EXPECTED + +test_begin_subtest "punctuation" +tag1='comic_swear=$&^%$^%\\//-+$^%$' +tag_enc1=$($TEST_DIRECTORY/hex-xcode --direction=encode "$tag1") +test_expect_equal "$tag_enc1" "comic_swear=%24%26%5e%25%24%5e%25%5c%5c%2f%2f-+%24%5e%25%24" + +test_begin_subtest "round trip newlines" +printf 'this\n tag\t has\n spaces\n' > EXPECTED.$test_count +$TEST_DIRECTORY/hex-xcode --direction=encode < EXPECTED.$test_count |\ + $TEST_DIRECTORY/hex-xcode --direction=decode > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +test_begin_subtest "round trip 8bit chars" +echo '%c3%91%c3%a5%c3%b0%c3%a3%c3%a5%c3%a9-%c3%8f%c3%8a' > EXPECTED.$test_count +$TEST_DIRECTORY/hex-xcode --direction=decode < EXPECTED.$test_count |\ + $TEST_DIRECTORY/hex-xcode --direction=encode > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count +test_done diff --git a/test/notmuch-test b/test/notmuch-test index 6a99ae3..cbe2ef9 100755 --- a/test/notmuch-test +++ b/test/notmuch-test @@ -52,6 +52,7 @@ TESTS=" python hooks argument-parsing + hex-escaping " TESTS=${NOTMUCH_TESTS:=$TESTS} -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
sup is the old format, and remains the default. Each line of the notmuch format is "msg_id tag tag...tag" where each space seperated token is 'hex-encoded' to remove troubling characters. In particular this format won't have the same problem with e.g. spaces in message-ids or tags; they will be round-trip-able. --- dump-restore-private.h | 12 ++++++++++++ notmuch-dump.c | 47 +++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 51 insertions(+), 8 deletions(-) create mode 100644 dump-restore-private.h diff --git a/dump-restore-private.h b/dump-restore-private.h new file mode 100644 index 0000000..34a5022 --- /dev/null +++ b/dump-restore-private.h @@ -0,0 +1,12 @@ +#ifndef DUMP_RESTORE_PRIVATE_H +#define DUMP_RESTORE_PRIVATE_H + +#include "hex-escape.h" +#include "command-line-arguments.h" + +typedef enum dump_formats { + DUMP_FORMAT_SUP, + DUMP_FORMAT_NOTMUCH +} dump_format_t; + +#endif diff --git a/notmuch-dump.c b/notmuch-dump.c index a735875..0231db2 100644 --- a/notmuch-dump.c +++ b/notmuch-dump.c @@ -19,6 +19,7 @@ */ #include "notmuch-client.h" +#include "dump-restore-private.h" int notmuch_dump_command (unused (void *ctx), int argc, char *argv[]) @@ -44,9 +45,15 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[]) char *output_file_name = NULL; int opt_index; + int output_format = DUMP_FORMAT_SUP; + notmuch_opt_desc_t options[] = { - { NOTMUCH_OPT_POSITION, &output_file_name, 0, 0, 0 }, - { 0, 0, 0, 0, 0 } + { NOTMUCH_OPT_KEYWORD, &output_format, "format", 'f', + (notmuch_keyword_t []){ { "sup", DUMP_FORMAT_SUP }, + { "notmuch", DUMP_FORMAT_NOTMUCH }, + {0, 0} } }, + { NOTMUCH_OPT_POSITION, &output_file_name, 0, 0, 0 }, + { 0, 0, 0, 0, 0 } }; opt_index = parse_arguments (argc, argv, options, 1); @@ -85,29 +92,53 @@ notmuch_dump_command (unused (void *ctx), int argc, char *argv[]) */ notmuch_query_set_sort (query, NOTMUCH_SORT_UNSORTED); + char *buffer = NULL; + size_t buffer_size = 0; + for (messages = notmuch_query_search_messages (query); notmuch_messages_valid (messages); notmuch_messages_move_to_next (messages)) { int first = 1; - message = notmuch_messages_get (messages); + const char *message_id; - fprintf (output, - "%s (", notmuch_message_get_message_id (message)); + message = notmuch_messages_get (messages); + message_id = notmuch_message_get_message_id (message); + + if (output_format == DUMP_FORMAT_SUP) { + fprintf (output, "%s (", message_id); + } else { + if (hex_encode (notmuch, message_id, + &buffer, &buffer_size) != HEX_SUCCESS) + return 1; + fprintf (output, "%s ", buffer); + } for (tags = notmuch_message_get_tags (message); notmuch_tags_valid (tags); notmuch_tags_move_to_next (tags)) { + const char *tag_str = notmuch_tags_get (tags); + if (! first) - fprintf (output, " "); + fputs (" ", output); - fprintf (output, "%s", notmuch_tags_get (tags)); + if (output_format == DUMP_FORMAT_SUP) { + fputs (tag_str, output); + } else { + if (hex_encode (notmuch, tag_str, + &buffer, &buffer_size) != HEX_SUCCESS) + return 1; + fputs (buffer, output); + } first = 0; } - fprintf (output, ")\n"); + if (output_format == DUMP_FORMAT_SUP) + fputs (")\n", output); + else + fputs ("\n", output); notmuch_message_destroy (message); } -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
The first test is really to test our assumptions about the corpus, namely that a certain set of message-id's is safe (i.e. doesn't change under hex-escaping). We then check dump output as best we can without functionality-to-come in notmuch-restore. --- test/dump-restore | 12 ++++++++++++ 1 files changed, 12 insertions(+), 0 deletions(-) diff --git a/test/dump-restore b/test/dump-restore index 439e998..0e210d3 100755 --- a/test/dump-restore +++ b/test/dump-restore @@ -82,4 +82,16 @@ test_begin_subtest "dump outfile -- from:cworth" notmuch dump dump-outfile-dash-inbox.actual -- from:cworth test_expect_equal_file dump-cworth.expected dump-outfile-dash-inbox.actual +test_begin_subtest "Check for a safe set of message-ids" +notmuch search --output=messages from:cworth > EXPECTED.$test_count +notmuch search --output=messages from:cworth |\ + $TEST_DIRECTORY/hex-xcode --direction=encode > OUTPUT.$test_count +test_expect_equal_file OUTPUT.$test_count EXPECTED.$test_count + +# we have observed that cworth has sane message-ids, and hopefully sane tags. +test_begin_subtest "dump --format=notmuch -- from:cworth" +notmuch dump --format=sup -- from:cworth | tr -d \(\) > EXPECTED.$test_count +notmuch dump --format=notmuch -- from:cworth > OUTPUT.$test_count +test_expect_equal_file OUTPUT.$test_count EXPECTED.$test_count + test_done -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
This is format is whitespace separated tokens, encoded by util/hex-escape.c The format detection heuristic relies on the fact that '(' is not part of the character set used by hex-escape. Since hex-escape is designed to be OK for pathnames (and shells), this seems like a reasonable assumption. In principle the --format argument to notmuch-restore is notmuch needed at this point, but it adds literally 5 lines of argument description, so I left it. --- dump-restore-private.h | 5 +- notmuch-restore.c | 111 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 92 insertions(+), 24 deletions(-) diff --git a/dump-restore-private.h b/dump-restore-private.h index 34a5022..67795e5 100644 --- a/dump-restore-private.h +++ b/dump-restore-private.h @@ -5,8 +5,9 @@ #include "command-line-arguments.h" typedef enum dump_formats { - DUMP_FORMAT_SUP, - DUMP_FORMAT_NOTMUCH + DUMP_FORMAT_AUTO, + DUMP_FORMAT_NOTMUCH, + DUMP_FORMAT_SUP } dump_format_t; #endif diff --git a/notmuch-restore.c b/notmuch-restore.c index 87d9772..3fdfecc 100644 --- a/notmuch-restore.c +++ b/notmuch-restore.c @@ -19,6 +19,7 @@ */ #include "notmuch-client.h" +#include "dump-restore-private.h" int notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) @@ -35,6 +36,7 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) regex_t regex; int rerr; int opt_index; + int input_format = DUMP_FORMAT_AUTO; config = notmuch_config_open (ctx, NULL, NULL); if (config == NULL) @@ -48,6 +50,11 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) synchronize_flags = notmuch_config_get_maildir_synchronize_flags (config); notmuch_opt_desc_t options[] = { + { NOTMUCH_OPT_KEYWORD, &input_format, "format", 'f', + (notmuch_keyword_t []){ { "auto", DUMP_FORMAT_AUTO }, + { "notmuch", DUMP_FORMAT_NOTMUCH }, + { "sup", DUMP_FORMAT_SUP }, + {0, 0} } }, { NOTMUCH_OPT_POSITION, &input_file_name, 0, 0, 0 }, { NOTMUCH_OPT_BOOLEAN, &accumulate, "accumulate", 'a', 0 }, { 0, 0, 0, 0, 0 } @@ -77,37 +84,85 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) return 1; } - /* Dump output is one line per message. We match a sequence of - * non-space characters for the message-id, then one or more - * spaces, then a list of space-separated tags as a sequence of - * characters within literal '(' and ')'. */ - if ( xregcomp (®ex, - "^([^ ]+) \\(([^)]*)\\)$", - REG_EXTENDED) ) - INTERNAL_ERROR("compile time constant regex failed."); + + /* These are out here to re-use the buffers with hex_decode */ + + char *message_id = NULL; + size_t message_id_size = 0; + char *tag = NULL; + size_t tag_size = 0; + notmuch_bool_t first_line = TRUE; while ((line_len = getline (&line, &line_size, input)) != -1) { regmatch_t match[3]; - char *message_id, *file_tags, *tag, *next; + char *file_tags, *next; notmuch_message_t *message = NULL; + notmuch_status_t status; notmuch_tags_t *db_tags; char *db_tags_str; chomp_newline (line); + if (first_line && input_format == DUMP_FORMAT_AUTO) { + char *p; - rerr = xregexec (®ex, line, 3, match, 0); - if (rerr == REG_NOMATCH) - { - fprintf (stderr, "Warning: Ignoring invalid input line: %s\n", - line); + for (p = line; *p; p++) { + if (*p == '(') + input_format = DUMP_FORMAT_SUP; + } + + if (input_format == DUMP_FORMAT_AUTO) + input_format = DUMP_FORMAT_NOTMUCH; + + } + + /* sup dump output is one line per message. We match a + * sequence of non-space characters for the message-id, then + * one or more spaces, then a list of space-separated tags as + * a sequence of characters within literal '(' and ')'. */ + if (first_line && input_format == DUMP_FORMAT_SUP) { + if ( xregcomp (®ex, + "^([^ ]+) \\(([^)]*)\\)$", + REG_EXTENDED) ) + INTERNAL_ERROR("compile time constant regex failed."); + } + + + /* Silently ignore blank lines */ + + if (line[0] == '\0') { continue; } - message_id = xstrndup (line + match[1].rm_so, - match[1].rm_eo - match[1].rm_so); - file_tags = xstrndup (line + match[2].rm_so, - match[2].rm_eo - match[2].rm_so); + if (input_format == DUMP_FORMAT_SUP) { + rerr = xregexec (®ex, line, 3, match, 0); + if (rerr == REG_NOMATCH) + { + fprintf (stderr, "Warning: Ignoring invalid input line: %s\n", + line); + continue; + } + message_id = talloc_strndup (notmuch, line + match[1].rm_so, + match[1].rm_eo - match[1].rm_so); + file_tags = talloc_strndup (notmuch, line + match[2].rm_so, + match[2].rm_eo - match[2].rm_so); + } else { + char *p = line; + char *raw_mid; + + raw_mid = strsep (&p, " \t"); + + if (hex_decode (notmuch, raw_mid, + &message_id, &message_id_size) != HEX_SUCCESS) + return 1; + + if (p) + file_tags = xstrdup (p); + else + file_tags = NULL; + } + + first_line = FALSE; status = notmuch_database_find_message (notmuch, message_id, &message); if (status || message == NULL) { @@ -153,7 +208,16 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) next = file_tags; while (next) { - tag = strsep (&next, " "); + char *raw_tag = strsep (&next, " "); + + if (input_format == DUMP_FORMAT_NOTMUCH) { + if (hex_decode (notmuch, raw_tag, + &tag, &tag_size) != HEX_SUCCESS) + return 1; + } else { + tag = talloc_strdup (notmuch, raw_tag); + } + if (*tag == '\0') continue; status = notmuch_message_add_tag (message, tag); @@ -175,11 +239,14 @@ notmuch_restore_command (unused (void *ctx), int argc, char *argv[]) if (message) notmuch_message_destroy (message); message = NULL; - free (message_id); - free (file_tags); + if (input_format == DUMP_FORMAT_SUP) { + talloc_free (message_id); + talloc_free (file_tags); + } } - regfree (®ex); + if (input_format == DUMP_FORMAT_SUP) + regfree (®ex); if (line) free (line); -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
|
David Bremner-2 |
|
|
In reply to this post by David Bremner-2
From: David Bremner <[hidden email]>
These one need the completed functionality in notmuch-restore. Fairly exotic tags are tested, but no weird message id's. We test each possible input to autodetection, both explicit (with --format=auto) and implicit (without --format). --- test/dump-restore | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 66 insertions(+), 0 deletions(-) diff --git a/test/dump-restore b/test/dump-restore index 0e210d3..60ae68e 100755 --- a/test/dump-restore +++ b/test/dump-restore @@ -94,4 +94,70 @@ notmuch dump --format=sup -- from:cworth | tr -d \(\) > EXPECTED.$test_count notmuch dump --format=notmuch -- from:cworth > OUTPUT.$test_count test_expect_equal_file OUTPUT.$test_count EXPECTED.$test_count +test_begin_subtest "format=notmuch, # round-trip" +notmuch dump --format=sup | sort > EXPECTED.$test_count +notmuch dump --format=notmuch | notmuch restore --format=notmuch +notmuch dump --format=sup | sort > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +tag1='comic_swear=$&^%$^%\\//-+$^%$' +enc1=$($TEST_DIRECTORY/hex-xcode --direction=encode "$tag1") + +tag2=$(printf 'this\n tag\t has\n spaces') +enc2=$($TEST_DIRECTORY/hex-xcode --direction=encode "$tag2") + +enc3='%c3%91%c3%a5%c3%b0%c3%a3%c3%a5%c3%a9-%c3%8f%c3%8a' +tag3=$($TEST_DIRECTORY/hex-xcode --direction=decode $enc3) + +notmuch dump --format=notmuch > BACKUP + +notmuch tag +"$tag1" +"$tag2" +"$tag3" -inbox -unread "*" + +test_begin_subtest 'format=notmuch, round trip with strange tags' + notmuch dump --format=notmuch > EXPECTED.$test_count + notmuch dump --format=notmuch | notmuch restore --format=notmuch + notmuch dump --format=notmuch > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +test_begin_subtest 'format=notmuch, checking encoded output' + cp /dev/null EXPECTED.$test_count + notmuch dump --format=notmuch -- from:cworth |\ + awk "{ print \$1 \" $enc1 $enc2 $enc3\" }" > EXPECTED.$test_count + + notmuch dump --format=notmuch -- from:cworth > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +test_begin_subtest 'restoring sane tags' + notmuch restore --format=notmuch < BACKUP + notmuch dump --format=notmuch > OUTPUT.$test_count +test_expect_equal_file BACKUP OUTPUT.$test_count + +test_begin_subtest 'format=notmuch, restore=auto' + notmuch dump --format=notmuch > EXPECTED.$test_count + notmuch tag -inbox -unread "*" + notmuch restore --format=auto < EXPECTED.$test_count + notmuch dump --format=notmuch > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +test_begin_subtest 'format=sup, restore=auto' + notmuch dump --format=sup > EXPECTED.$test_count + notmuch tag -inbox -unread "*" + notmuch restore --format=auto < EXPECTED.$test_count + notmuch dump --format=sup > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +test_begin_subtest 'format=notmuch, restore=default' + notmuch dump --format=notmuch > EXPECTED.$test_count + notmuch tag -inbox -unread "*" + notmuch restore < EXPECTED.$test_count + notmuch dump --format=notmuch > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + +test_begin_subtest 'format=sup, restore=default' + notmuch dump --format=sup > EXPECTED.$test_count + notmuch tag -inbox -unread "*" + notmuch restore < EXPECTED.$test_count + notmuch dump --format=sup > OUTPUT.$test_count +test_expect_equal_file EXPECTED.$test_count OUTPUT.$test_count + test_done -- 1.7.7.3 _______________________________________________ notmuch mailing list [hidden email] http://notmuchmail.org/mailman/listinfo/notmuch |
| Powered by Nabble | See how NAML generates this page |