Draft of indexing user defined headers

classic Classic list List threaded Threaded
6 messages Options
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

Draft of indexing user defined headers

This is an early draft, it definitely needs polishing.  Feedback on the general design welcome.
Notice that users will need to reindex manually after changing the set of indexed headers.


_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[RFC patch 1/5] cli/config: refactor _stored_in_db

This will make it easier to add other prefixes that are stored in the
database, compared to special casing each one as "query." was.
---
 notmuch-config.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/notmuch-config.c b/notmuch-config.c
index bf77cc9d..1517d0ff 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -821,17 +821,26 @@ _item_split (char *item, char **group, char **key)
 
 #define BUILT_WITH_PREFIX "built_with."
 
+struct config_key {
+    const char *name;
+    bool prefix;
+};
+
 static bool
 _stored_in_db (const char *item)
 {
-    const char * db_configs[] = {
- "index.decrypt",
+    struct config_key db_configs[] = {
+ {"index.decrypt", false},
+ {"query.", true},
     };
-    if (STRNCMP_LITERAL (item, "query.") == 0)
- return true;
-    for (size_t i = 0; i < ARRAY_SIZE (db_configs); i++)
- if (strcmp (item, db_configs[i]) == 0)
+    for (size_t i = 0; i < ARRAY_SIZE (db_configs); i++) {
+ if (db_configs[i].prefix &&
+    strncmp (item, db_configs[i].name,
+     strlen(db_configs[i].name)) == 0)
     return true;
+ if (strcmp (item, db_configs[i].name) == 0)
+    return true;
+    }
     return false;
 }
 
--
2.19.1

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[RFC patch 2/5] cli/config: support user header index config

In reply to this post by David Bremner-2
We don't do anything with this configuration information information
yet, but nonetheless add a couple of regression tests to make sure we
don't break standard functionality when we do use the configuration
information.
---
 notmuch-config.c         |  1 +
 test/T720-user-header.sh | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)
 create mode 100755 test/T720-user-header.sh

diff --git a/notmuch-config.c b/notmuch-config.c
index 1517d0ff..5112fecd 100644
--- a/notmuch-config.c
+++ b/notmuch-config.c
@@ -831,6 +831,7 @@ _stored_in_db (const char *item)
 {
     struct config_key db_configs[] = {
  {"index.decrypt", false},
+ {"index.header.", true},
  {"query.", true},
     };
     for (size_t i = 0; i < ARRAY_SIZE (db_configs); i++) {
diff --git a/test/T720-user-header.sh b/test/T720-user-header.sh
new file mode 100755
index 00000000..774bad2f
--- /dev/null
+++ b/test/T720-user-header.sh
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+test_description='indexing user specified headers'
+. $(dirname "$0")/test-lib.sh || exit 1
+
+test_begin_subtest "error adding user header before initializing DB"
+test_expect_code 1 "notmuch config set index.header.list \"List-Id\""
+
+add_email_corpus
+
+notmuch search '*' | notmuch_search_sanitize > initial-threads
+notmuch search --output=messages '*' > initial-message-ids
+notmuch dump > initial-dump
+
+test_begin_subtest "adding user header"
+test_expect_code 0 "notmuch config set index.header.list \"List-Id\""
+
+test_begin_subtest "adding user header"
+test_expect_code 0 "notmuch config set index.header.list \"List-Id\""
+
+test_begin_subtest "retrieve user header"
+output=$(notmuch config get index.header.list)
+test_expect_equal "List-Id" "$output"
+
+test_begin_subtest 'reindex after adding header preserves threads'
+notmuch reindex '*'
+notmuch search '*' | notmuch_search_sanitize > OUTPUT
+test_expect_equal_file initial-threads OUTPUT
+
+test_begin_subtest "List all user headers"
+notmuch config set index.header.spam "X-Spam"
+notmuch config list | grep ^index.header | notmuch_config_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+index.header.list=List-Id
+index.header.spam=X-Spam
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_done
--
2.19.1

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[RFC patch 3/5] lib: setup user headers in query parser

In reply to this post by David Bremner-2
These tests will need to be updated if the Xapian
query print/debug format changes.
---
 lib/database.cc          | 40 ++++++++++++++++++++++++++++++++++++++++
 test/T720-user-header.sh | 21 +++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/lib/database.cc b/lib/database.cc
index 9cf8062c..fa77eb91 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -299,6 +299,8 @@ prefix_t prefix_table[] = {
  NOTMUCH_FIELD_PROCESSOR},
 };
 
+#define CONFIG_HEADER_PREFIX "index.header."
+
 static void
 _setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
 {
@@ -308,6 +310,43 @@ _setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
  notmuch->query_parser->add_boolean_prefix (prefix->name, prefix->prefix);
 }
 
+static notmuch_status_t
+_setup_user_query_fields (notmuch_database_t *notmuch)
+{
+    notmuch_config_list_t *list;
+    void *local = talloc_new(notmuch);
+    notmuch_status_t status;
+
+    status = notmuch_database_get_config_list (notmuch, "index.header.", &list);
+    if (status)
+ return status;
+    for (; notmuch_config_list_valid (list); notmuch_config_list_move_to_next (list)) {
+
+ prefix_t query_field { .name = NULL, .prefix = NULL,
+ .flags = NOTMUCH_FIELD_PROBABILISTIC |
+ NOTMUCH_FIELD_EXTERNAL
+ };
+
+ const char *key = notmuch_config_list_key (list)
+    + sizeof (CONFIG_HEADER_PREFIX) - 1;
+
+ char *prefix = talloc_asprintf(local, "XU:%s", key);
+ for (char *p = prefix + 1; *p; p++)
+    *p = toupper (*p);
+
+ query_field.name = key;
+ query_field.prefix = prefix;
+
+ _setup_query_field_default (&query_field, notmuch);
+    }
+
+    talloc_free (local);
+
+    notmuch_config_list_destroy (list);
+
+    return NOTMUCH_STATUS_SUCCESS;
+}
+
 #if HAVE_XAPIAN_FIELD_PROCESSOR
 static void
 _setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch)
@@ -965,6 +1004,7 @@ notmuch_database_open_verbose (const char *path,
  _setup_query_field (prefix, notmuch);
     }
  }
+ status = _setup_user_query_fields (notmuch);
     } catch (const Xapian::Error &error) {
  IGNORE_RESULT (asprintf (&message, "A Xapian exception occurred opening database: %s\n",
  error.get_msg().c_str()));
diff --git a/test/T720-user-header.sh b/test/T720-user-header.sh
index 774bad2f..ab4d4712 100755
--- a/test/T720-user-header.sh
+++ b/test/T720-user-header.sh
@@ -35,4 +35,25 @@ index.header.spam=X-Spam
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "parse user prefix"
+NOTMUCH_DEBUG_QUERY=t notmuch count 'list:"notmuch"' 2>&1 | grep Tmail >OUTPUT
+cat <<EOF > EXPECTED
+Query((Tmail AND XU:LISTnotmuch@1))
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "parse user prefix, stemmed"
+NOTMUCH_DEBUG_QUERY=t notmuch count 'list:notmuch' 2>&1 | grep Tmail >OUTPUT
+cat <<EOF > EXPECTED
+Query((Tmail AND ZXU:LISTnotmuch@1))
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "parse user prefix, phrase"
+NOTMUCH_DEBUG_QUERY=t notmuch count 'list:notmuchmail.org' 2>&1 | grep Tmail >OUTPUT
+cat <<EOF > EXPECTED
+Query((Tmail AND (XU:LISTnotmuchmail@1 PHRASE 2 XU:LISTorg@2)))
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
--
2.19.1

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[RFC patch 4/5] WIP: store user prefixes

In reply to this post by David Bremner-2
Maybe this should be unified into one prefix table?
---
 lib/database-private.h |  8 ++++++++
 lib/database.cc        | 32 +++++++++++++++++++-------------
 lib/notmuch-private.h  |  4 ++++
 lib/thread.cc          |  2 --
 4 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/lib/database-private.h b/lib/database-private.h
index a499b259..f5e90099 100644
--- a/lib/database-private.h
+++ b/lib/database-private.h
@@ -178,6 +178,12 @@ operator&(notmuch_field_flag_t a, notmuch_field_flag_t b)
     Xapian::QueryParser::FLAG_WILDCARD | \
     Xapian::QueryParser::FLAG_PURE_NOT)
 
+typedef struct {
+    const char *name;
+    const char *prefix;
+    notmuch_field_flag_t flags;
+} prefix_t;
+
 struct _notmuch_database {
     bool exception_reported;
 
@@ -215,6 +221,8 @@ struct _notmuch_database {
     Xapian::ValueRangeProcessor *value_range_processor;
     Xapian::ValueRangeProcessor *date_range_processor;
     Xapian::ValueRangeProcessor *last_mod_range_processor;
+
+    prefix_t *user_prefix;
 };
 
 /* Prior to database version 3, features were implied by the database
diff --git a/lib/database.cc b/lib/database.cc
index fa77eb91..b2654ab7 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -41,12 +41,6 @@ using namespace std;
 
 #define ARRAY_SIZE(arr) (sizeof (arr) / sizeof (arr[0]))
 
-typedef struct {
-    const char *name;
-    const char *prefix;
-    notmuch_field_flag_t flags;
-} prefix_t;
-
 #define NOTMUCH_DATABASE_VERSION 3
 
 #define STRINGIFY(s) _SUB_STRINGIFY(s)
@@ -317,15 +311,24 @@ _setup_user_query_fields (notmuch_database_t *notmuch)
     void *local = talloc_new(notmuch);
     notmuch_status_t status;
 
+    size_t prefix_capacity = 4;
+    size_t prefix_index = 0;
+
+    notmuch->user_prefix = talloc_zero_array (notmuch, prefix_t, prefix_capacity);
+    if (notmuch->user_prefix == NULL)
+ return NOTMUCH_STATUS_OUT_OF_MEMORY;
+    
     status = notmuch_database_get_config_list (notmuch, "index.header.", &list);
     if (status)
  return status;
     for (; notmuch_config_list_valid (list); notmuch_config_list_move_to_next (list)) {
 
- prefix_t query_field { .name = NULL, .prefix = NULL,
- .flags = NOTMUCH_FIELD_PROBABILISTIC |
- NOTMUCH_FIELD_EXTERNAL
- };
+ if (prefix_index >= prefix_capacity-1) {
+    prefix_capacity *= 2;
+    notmuch->user_prefix = talloc_realloc (notmuch, notmuch->user_prefix, prefix_t, prefix_capacity);
+ }
+
+ prefix_t *query_field = &(notmuch->user_prefix[prefix_index]);
 
  const char *key = notmuch_config_list_key (list)
     + sizeof (CONFIG_HEADER_PREFIX) - 1;
@@ -334,10 +337,13 @@ _setup_user_query_fields (notmuch_database_t *notmuch)
  for (char *p = prefix + 1; *p; p++)
     *p = toupper (*p);
 
- query_field.name = key;
- query_field.prefix = prefix;
+ query_field->name = key;
+ query_field->prefix = prefix;
+ query_field->flags = NOTMUCH_FIELD_PROBABILISTIC | NOTMUCH_FIELD_EXTERNAL;
+
+ _setup_query_field_default (query_field, notmuch);
 
- _setup_query_field_default (&query_field, notmuch);
+ prefix_index++;
     }
 
     talloc_free (local);
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index df32d39c..09f828ab 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -676,6 +676,10 @@ struct _notmuch_indexopts {
     _notmuch_crypto_t crypto;
 };
 
+#define CONFIG_HEADER_PREFIX "index.header."
+
+#define EMPTY_STRING(s) ((s)[0] == '\0')
+
 NOTMUCH_END_DECLS
 
 #ifdef __cplusplus
diff --git a/lib/thread.cc b/lib/thread.cc
index 47c90664..ae830064 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -30,8 +30,6 @@
 #define THREAD_DEBUG(format, ...) do {} while (0) /* ignored */
 #endif
 
-#define EMPTY_STRING(s) ((s)[0] == '\0')
-
 struct _notmuch_thread {
     notmuch_database_t *notmuch;
     char *thread_id;
--
2.19.1

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[RFC patch 5/5] WIP: index user headers

In reply to this post by David Bremner-2
---
 lib/database.cc          | 37 +++++++++++++++++++++++++++++--------
 lib/index.cc             | 34 ++++++++++++++++++++++++++++++++++
 lib/message.cc           |  5 ++++-
 lib/notmuch-private.h    |  4 ++++
 test/T720-user-header.sh | 17 +++++++++++++++++
 5 files changed, 88 insertions(+), 9 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index b2654ab7..c25d867f 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -293,8 +293,6 @@ prefix_t prefix_table[] = {
  NOTMUCH_FIELD_PROCESSOR},
 };
 
-#define CONFIG_HEADER_PREFIX "index.header."
-
 static void
 _setup_query_field_default (const prefix_t *prefix, notmuch_database_t *notmuch)
 {
@@ -308,7 +306,6 @@ static notmuch_status_t
 _setup_user_query_fields (notmuch_database_t *notmuch)
 {
     notmuch_config_list_t *list;
-    void *local = talloc_new(notmuch);
     notmuch_status_t status;
 
     size_t prefix_capacity = 4;
@@ -318,7 +315,7 @@ _setup_user_query_fields (notmuch_database_t *notmuch)
     if (notmuch->user_prefix == NULL)
  return NOTMUCH_STATUS_OUT_OF_MEMORY;
     
-    status = notmuch_database_get_config_list (notmuch, "index.header.", &list);
+    status = notmuch_database_get_config_list (notmuch, CONFIG_HEADER_PREFIX, &list);
     if (status)
  return status;
     for (; notmuch_config_list_valid (list); notmuch_config_list_move_to_next (list)) {
@@ -333,11 +330,11 @@ _setup_user_query_fields (notmuch_database_t *notmuch)
  const char *key = notmuch_config_list_key (list)
     + sizeof (CONFIG_HEADER_PREFIX) - 1;
 
- char *prefix = talloc_asprintf(local, "XU:%s", key);
+ char *prefix = talloc_asprintf(notmuch, "XU:%s", key);
  for (char *p = prefix + 1; *p; p++)
     *p = toupper (*p);
 
- query_field->name = key;
+ query_field->name = talloc_strdup(notmuch, key);
  query_field->prefix = prefix;
  query_field->flags = NOTMUCH_FIELD_PROBABILISTIC | NOTMUCH_FIELD_EXTERNAL;
 
@@ -346,8 +343,6 @@ _setup_user_query_fields (notmuch_database_t *notmuch)
  prefix_index++;
     }
 
-    talloc_free (local);
-
     notmuch_config_list_destroy (list);
 
     return NOTMUCH_STATUS_SUCCESS;
@@ -399,6 +394,32 @@ _find_prefix (const char *name)
     return "";
 }
 
+const char *
+_maybe_user_prefix (notmuch_database_t *notmuch, const char *name)
+{
+
+    unsigned int i;
+
+    /*XXX TODO: reduce code duplication */
+    for (i = 0; i < ARRAY_SIZE (prefix_table); i++) {
+ if (strcmp (name, prefix_table[i].name) == 0)
+    return prefix_table[i].prefix;
+    }
+
+    if (notmuch->user_prefix) {
+ unsigned int i;
+
+ for (i = 0; notmuch->user_prefix[i].name; i++) {
+    if (strcmp (name, notmuch->user_prefix[i].name) == 0)
+ return notmuch->user_prefix[i].prefix;
+ }
+    }
+
+    return NULL;
+}
+
+
+
 static const struct {
     /* NOTMUCH_FEATURE_* value. */
     _notmuch_features value;
diff --git a/lib/index.cc b/lib/index.cc
index efd9da4c..6324b5a0 100644
--- a/lib/index.cc
+++ b/lib/index.cc
@@ -595,6 +595,38 @@ _index_encrypted_mime_part (notmuch_message_t *message,
 
 }
 
+static notmuch_status_t
+_notmuch_message_index_user_headers (notmuch_message_t *message, GMimeMessage *mime_message) {
+    notmuch_database_t *notmuch = notmuch_message_get_database (message);
+
+    notmuch_config_list_t *list;
+    notmuch_status_t status;
+
+    status = notmuch_database_get_config_list (notmuch, CONFIG_HEADER_PREFIX, &list);
+    if (status)
+ return status;
+    for (; notmuch_config_list_valid (list); notmuch_config_list_move_to_next (list)) {
+
+ const char *prefix_name = notmuch_config_list_key (list)
+    + sizeof (CONFIG_HEADER_PREFIX) - 1;
+
+ const char *header_name = notmuch_config_list_value (list);
+
+ /* this is gmime 3.0 specific, I think */
+
+ const char *header = g_mime_object_get_header (GMIME_OBJECT (mime_message), header_name);
+ if (header == NULL)
+    return NOTMUCH_STATUS_SUCCESS;
+
+
+ _notmuch_message_gen_terms (message, prefix_name, header);
+    }
+
+    return NOTMUCH_STATUS_SUCCESS;
+
+}
+
+
 notmuch_status_t
 _notmuch_message_index_file (notmuch_message_t *message,
      notmuch_indexopts_t *indexopts,
@@ -625,6 +657,8 @@ _notmuch_message_index_file (notmuch_message_t *message,
     subject = g_mime_message_get_subject (mime_message);
     _notmuch_message_gen_terms (message, "subject", subject);
 
+    status = _notmuch_message_index_user_headers (message, mime_message);
+
     _index_mime_part (message, indexopts, g_mime_message_get_mime_part (mime_message));
 
     return NOTMUCH_STATUS_SUCCESS;
diff --git a/lib/message.cc b/lib/message.cc
index 6f2f6345..f428e210 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -1434,7 +1434,10 @@ _notmuch_message_gen_terms (notmuch_message_t *message,
     term_gen->set_document (message->doc);
 
     if (prefix_name) {
- const char *prefix = _find_prefix (prefix_name);
+ const char *prefix = _maybe_user_prefix (message->notmuch, prefix_name);
+
+ if (prefix == NULL)
+    return NOTMUCH_PRIVATE_STATUS_BAD_PREFIX;
 
  term_gen->set_termpos (message->termpos);
  term_gen->index_text (text, 1, prefix);
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 09f828ab..098be4eb 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -136,6 +136,7 @@ typedef enum _notmuch_private_status {
     /* Then add our own private values. */
     NOTMUCH_PRIVATE_STATUS_TERM_TOO_LONG = NOTMUCH_STATUS_LAST_STATUS,
     NOTMUCH_PRIVATE_STATUS_NO_DOCUMENT_FOUND,
+    NOTMUCH_PRIVATE_STATUS_BAD_PREFIX,
 
     NOTMUCH_PRIVATE_STATUS_LAST_STATUS
 } notmuch_private_status_t;
@@ -181,6 +182,9 @@ typedef struct _notmuch_doc_id_set notmuch_doc_id_set_t;
 const char *
 _find_prefix (const char *name);
 
+const char *
+_maybe_user_prefix (notmuch_database_t *notmuch, const char *name);
+
 char *
 _notmuch_message_id_compressed (void *ctx, const char *message_id);
 
diff --git a/test/T720-user-header.sh b/test/T720-user-header.sh
index ab4d4712..c02861cc 100755
--- a/test/T720-user-header.sh
+++ b/test/T720-user-header.sh
@@ -56,4 +56,21 @@ Query((Tmail AND (XU:LISTnotmuchmail@1 PHRASE 2 XU:LISTorg@2)))
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "index user header"
+notmuch config set index.header.list "List-Id"
+notmuch reindex '*'
+notmuch search --output=files list:notmuch | notmuch_search_files_sanitize | sort > OUTPUT
+cat <<EOF > EXPECTED
+MAIL_DIR/bar/baz/05:2,
+MAIL_DIR/bar/baz/23:2,
+MAIL_DIR/bar/baz/24:2,
+MAIL_DIR/bar/cur/20:2,
+MAIL_DIR/bar/new/21:2,
+MAIL_DIR/bar/new/22:2,
+MAIL_DIR/foo/cur/08:2,
+MAIL_DIR/foo/new/03:2,
+MAIL_DIR/new/04:2,
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
--
2.19.1

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch