regexp search for more things

classic Classic list List threaded Threaded
6 messages Options
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

regexp search for more things

This series combines the mid: searching patch left out of 0.24 [1]
with the WIP patch of [2] adding regexp search for tags and paths.  It
is rebased on the regexp fixes of [3], which should hopefully be
merged soon.

[1]: id:[hidden email]
[2]: id:[hidden email]
[3]: id:[hidden email]
_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/2] lib: Add regexp searching for mid: prefix

The bulk of the change is passing in the field options to the regexp
field processor, so that we can properly handle the
fallback (non-regexp case).
---
 doc/man7/notmuch-search-terms.rst | 26 +++++++++++++++-----------
 lib/database.cc                   |  6 ++++--
 lib/regexp-fields.cc              | 22 +++++++++++++++++-----
 lib/regexp-fields.h               |  4 +++-
 test/T650-regexp-query.sh         | 16 ++++++++++++++++
 5 files changed, 55 insertions(+), 19 deletions(-)

diff --git a/doc/man7/notmuch-search-terms.rst b/doc/man7/notmuch-search-terms.rst
index 47cab48d..a8ba5e02 100644
--- a/doc/man7/notmuch-search-terms.rst
+++ b/doc/man7/notmuch-search-terms.rst
@@ -50,6 +50,10 @@ indicate user-supplied values):
 
 -  id:<message-id>
 
+-  mid:<message-id>
+
+-  mid:/<regex>/
+
 -  thread:<thread-id>
 
 -  folder:<maildir-folder>
@@ -75,15 +79,6 @@ subject of an email. Searching for a phrase in the subject is supported
 by including quotation marks around the phrase, immediately following
 **subject:**.
 
-If notmuch is built with **Xapian Field Processors** (see below) the
-**from:** and **subject** prefix can be also used to restrict the
-results to those whose from/subject value matches a regular expression
-(see **regex(7)**) delimited with //.
-
-::
-
-   notmuch search 'from:/bob@.*[.]example[.]com/'
-
 The **attachment:** prefix can be used to search for specific filenames
 (or extensions) of attachments to email messages.
 
@@ -95,8 +90,8 @@ For **tag:** and **is:** valid tag values include **inbox** and
 **unread** by default for new messages added by **notmuch new** as well
 as any other tag values added manually with **notmuch tag**.
 
-For **id:**, message ID values are the literal contents of the
-Message-ID: header of email messages, but without the '<', '>'
+For **id:** and **mid**, message ID values are the literal contents of
+the Message-ID: header of email messages, but without the '<', '>'
 delimiters.
 
 The **thread:** prefix can be used with the thread ID values that are
@@ -159,6 +154,15 @@ The **property:** prefix searches for messages with a particular
 (and extensions) to add metadata to messages. A given key can be
 present on a given message with several different values.
 
+If notmuch is built with **Xapian Field Processors** (see below) the
+**from:**, **mid:** (but not **id:**), and **subject** prefix can be
+also used to restrict the results to those whose from/subject value
+matches a regular expression (see **regex(7)**) delimited with //.
+
+::
+
+   notmuch search 'from:/bob@.*[.]example[.]com/'
+
 Operators
 ---------
 
diff --git a/lib/database.cc b/lib/database.cc
index 5bc131a3..49b3849c 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -262,7 +262,8 @@ prefix_t prefix_table[] = {
     { "tag", "K", NOTMUCH_FIELD_EXTERNAL },
     { "is", "K", NOTMUCH_FIELD_EXTERNAL },
     { "id", "Q", NOTMUCH_FIELD_EXTERNAL },
-    { "mid", "Q", NOTMUCH_FIELD_EXTERNAL },
+    { "mid", "Q", NOTMUCH_FIELD_EXTERNAL |
+ NOTMUCH_FIELD_PROCESSOR },
     { "path", "P", NOTMUCH_FIELD_EXTERNAL },
     { "property", "XPROPERTY", NOTMUCH_FIELD_EXTERNAL },
     /*
@@ -313,7 +314,8 @@ _setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch)
  else if (STRNCMP_LITERAL(prefix->name, "query") == 0)
     fp = (new QueryFieldProcessor (*notmuch->query_parser, notmuch))->release ();
  else
-    fp = (new RegexpFieldProcessor (prefix->name, *notmuch->query_parser, notmuch))->release ();
+    fp = (new RegexpFieldProcessor (prefix->name, prefix->flags,
+    *notmuch->query_parser, notmuch))->release ();
 
  /* we treat all field-processor fields as boolean in order to get the raw input */
  notmuch->query_parser->add_boolean_prefix (prefix->name, fp);
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 08c6ccb5..fb1e951f 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -135,21 +135,33 @@ static inline Xapian::valueno _find_slot (std::string prefix)
  return NOTMUCH_VALUE_FROM;
     else if (prefix == "subject")
  return NOTMUCH_VALUE_SUBJECT;
+    else if (prefix == "mid")
+ return NOTMUCH_VALUE_MESSAGE_ID;
     else
  throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'");
 }
 
-RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_)
- : slot (_find_slot (prefix)), term_prefix (_find_prefix (prefix.c_str ())),
-  parser (parser_), notmuch (notmuch_)
+RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
+    notmuch_field_flag_t options_,
+    Xapian::QueryParser &parser_,
+    notmuch_database_t *notmuch_)
+ : slot (_find_slot (prefix)),
+  term_prefix (_find_prefix (prefix.c_str ())),
+  options (options_),
+  parser (parser_),
+  notmuch (notmuch_)
 {
 };
 
 Xapian::Query
 RegexpFieldProcessor::operator() (const std::string & str)
 {
-    if (str.size () == 0)
- return Xapian::Query::MatchAll;
+    if (str.size () == 0) {
+ if (options & NOTMUCH_FIELD_PROBABILISTIC)
+    return Xapian::Query::MatchAll;
+ else
+    return Xapian::Query(term_prefix);
+    }
 
     if (str.at (0) == '/') {
  if (str.at (str.size () - 1) == '/'){
diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h
index a4ba7ad8..d5f93445 100644
--- a/lib/regexp-fields.h
+++ b/lib/regexp-fields.h
@@ -65,11 +65,13 @@ class RegexpFieldProcessor : public Xapian::FieldProcessor {
  protected:
     Xapian::valueno slot;
     std::string term_prefix;
+    notmuch_field_flag_t options;
     Xapian::QueryParser &parser;
     notmuch_database_t *notmuch;
 
  public:
-    RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_);
+    RegexpFieldProcessor (std::string prefix, notmuch_field_flag_t options,
+  Xapian::QueryParser &parser_, notmuch_database_t *notmuch_);
 
     ~RegexpFieldProcessor () { };
 
diff --git a/test/T650-regexp-query.sh b/test/T650-regexp-query.sh
index 05c244c9..5bd24086 100755
--- a/test/T650-regexp-query.sh
+++ b/test/T650-regexp-query.sh
@@ -94,4 +94,20 @@ Query string was: from:/unbalanced[/
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "empty mid search"
+notmuch search --output=messages mid:yoom > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty mid regex search"
+notmuch search --output=messages mid:/yoom/ > OUTPUT
+test_expect_equal_file cworth.msg-ids OUTPUT
+
+test_begin_subtest "combine regexp mid and subject"
+notmuch search  subject:/-C/ and mid:/y..m/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [1/2] Carl Worth| Jan Janak; [notmuch] [PATCH] Older versions of install do not support -C. (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
--
2.11.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 2/2] lib: Add regexp expansion for for tags and paths

In reply to this post by David Bremner-2
From a ui perspective this looks similar to what was already provided
for from, subject, and mid, but the implimentation is quite
different. It uses the database's list of terms to construct a term
based query equivalent to the passed regular expression.
---
 doc/man7/notmuch-search-terms.rst |   9 ++--
 lib/database.cc                   |  12 +++--
 lib/regexp-fields.cc              |  42 +++++++++++----
 test/T650-regexp-query.sh         | 107 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 152 insertions(+), 18 deletions(-)

diff --git a/doc/man7/notmuch-search-terms.rst b/doc/man7/notmuch-search-terms.rst
index a8ba5e02..ab03a2e9 100644
--- a/doc/man7/notmuch-search-terms.rst
+++ b/doc/man7/notmuch-search-terms.rst
@@ -48,6 +48,8 @@ indicate user-supplied values):
 
 -  tag:<tag> (or is:<tag>)
 
+-  tag:<regex> (or is:<regex>)
+
 -  id:<message-id>
 
 -  mid:<message-id>
@@ -155,9 +157,10 @@ The **property:** prefix searches for messages with a particular
 present on a given message with several different values.
 
 If notmuch is built with **Xapian Field Processors** (see below) the
-**from:**, **mid:** (but not **id:**), and **subject** prefix can be
-also used to restrict the results to those whose from/subject value
-matches a regular expression (see **regex(7)**) delimited with //.
+**folder:**, **from:**, **is**, **mid:** (but not **id:**), **path:**,
+**subject:**, and **tag:** prefix can be also used to restrict the
+results to those whose from/subject value matches a regular expression
+(see **regex(7)**) delimited with //.
 
 ::
 
diff --git a/lib/database.cc b/lib/database.cc
index 49b3849c..5b13f541 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -259,12 +259,15 @@ prefix_t prefix_table[] = {
     { "file-direntry", "XFDIRENTRY", NOTMUCH_FIELD_NO_FLAGS },
     { "directory-direntry", "XDDIRENTRY", NOTMUCH_FIELD_NO_FLAGS },
     { "thread", "G", NOTMUCH_FIELD_EXTERNAL },
-    { "tag", "K", NOTMUCH_FIELD_EXTERNAL },
-    { "is", "K", NOTMUCH_FIELD_EXTERNAL },
+    { "tag", "K", NOTMUCH_FIELD_EXTERNAL |
+ NOTMUCH_FIELD_PROCESSOR },
+    { "is", "K", NOTMUCH_FIELD_EXTERNAL |
+        NOTMUCH_FIELD_PROCESSOR },
     { "id", "Q", NOTMUCH_FIELD_EXTERNAL },
     { "mid", "Q", NOTMUCH_FIELD_EXTERNAL |
  NOTMUCH_FIELD_PROCESSOR },
-    { "path", "P", NOTMUCH_FIELD_EXTERNAL },
+    { "path", "P", NOTMUCH_FIELD_EXTERNAL|
+ NOTMUCH_FIELD_PROCESSOR },
     { "property", "XPROPERTY", NOTMUCH_FIELD_EXTERNAL },
     /*
      * Unconditionally add ':' to reduce potential ambiguity with
@@ -272,7 +275,8 @@ prefix_t prefix_table[] = {
      * letters. See Xapian document termprefixes.html for related
      * discussion.
      */
-    { "folder", "XFOLDER:", NOTMUCH_FIELD_EXTERNAL },
+    { "folder", "XFOLDER:", NOTMUCH_FIELD_EXTERNAL |
+ NOTMUCH_FIELD_PROCESSOR },
 #if HAVE_XAPIAN_FIELD_PROCESSOR
     { "date", NULL, NOTMUCH_FIELD_EXTERNAL |
  NOTMUCH_FIELD_PROCESSOR },
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index fb1e951f..e8fa1658 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -138,7 +138,7 @@ static inline Xapian::valueno _find_slot (std::string prefix)
     else if (prefix == "mid")
  return NOTMUCH_VALUE_MESSAGE_ID;
     else
- throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'");
+ return Xapian::BAD_VALUENO;
 }
 
 RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
@@ -163,24 +163,44 @@ RegexpFieldProcessor::operator() (const std::string & str)
     return Xapian::Query(term_prefix);
     }
 
-    if (str.at (0) == '/') {
- if (str.at (str.size () - 1) == '/'){
-    RegexpPostingSource *postings = new RegexpPostingSource (slot, str.substr(1,str.size () - 2));
-    return Xapian::Query (postings->release ());
+    if (str.length() > 0 && str.at (0) == '/') {
+ if (str.length() > 1 && str.at (str.size () - 1) == '/'){
+    std::string regexp_str = str.substr(1,str.size () - 2);
+    if (slot != Xapian::BAD_VALUENO) {
+ RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
+ return Xapian::Query (postings->release ());
+    } else {
+ std::vector<std::string> terms;
+ regex_t regexp;
+
+ compile_regex(regexp, regexp_str.c_str ());
+ for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
+     it != notmuch->xapian_db->allterms_end (); ++it) {
+    if (regexec (&regexp, (*it).c_str (), 0, NULL, 0) == 0)
+ terms.push_back(*it);
+ }
+ return Xapian::Query (Xapian::Query::OP_OR, terms.begin(), terms.end());
+    }
  } else {
     throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
  }
     } else {
- /* TODO replace this with a nicer API level triggering of
- * phrase parsing, when possible */
- std::string query_str;
+ if (options & NOTMUCH_FIELD_PROBABILISTIC) {
+  /* TODO replace this with a nicer API level triggering of
+   * phrase parsing, when possible */
+  std::string query_str;
 
- if (str.find (' ') != std::string::npos)
+  if (str.find (' ') != std::string::npos)
     query_str = '"' + str + '"';
- else
+  else
     query_str = str;
 
- return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
+  return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
+ } else {
+    /* Boolean prefix */
+    std::string term = term_prefix + str;
+    return Xapian::Query (term);
+ }
     }
 }
 #endif
diff --git a/test/T650-regexp-query.sh b/test/T650-regexp-query.sh
index 5bd24086..e526ed76 100755
--- a/test/T650-regexp-query.sh
+++ b/test/T650-regexp-query.sh
@@ -110,4 +110,111 @@ thread:XXX   2009-11-18 [1/2] Carl Worth| Jan Janak; [notmuch] [PATCH] Older ver
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "empty folder: search"
+notmuch search --output=files folder:baz > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty folder:// search"
+notmuch search --output=files folder:/^baz/ > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty folder:// search"
+notmuch search --output=files folder:/baz/ | notmuch_search_files_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+MAIL_DIR/bar/baz/new/28:2,
+MAIL_DIR/bar/baz/new/27:2,
+MAIL_DIR/bar/baz/cur/26:2,
+MAIL_DIR/bar/baz/cur/25:2,
+MAIL_DIR/bar/baz/24:2,
+MAIL_DIR/bar/baz/23:2,
+MAIL_DIR/foo/baz/new/16:2,
+MAIL_DIR/foo/baz/new/15:2,
+MAIL_DIR/foo/baz/cur/14:2,
+MAIL_DIR/foo/baz/cur/13:2,
+MAIL_DIR/foo/baz/12:2,
+MAIL_DIR/foo/baz/11:2,
+MAIL_DIR/bar/baz/05:2,
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty folder:// search combined to be empty"
+notmuch search folder:/baz/ and to:archlinux | notmuch_search_files_sanitize > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty path: search"
+notmuch search --output=files path:baz > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty path:// search"
+notmuch search --output=files path:/^baz/ > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty path:// search"
+notmuch search --output=files path:/baz\/new/ | notmuch_search_files_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+MAIL_DIR/bar/baz/new/28:2,
+MAIL_DIR/bar/baz/new/27:2,
+MAIL_DIR/foo/baz/new/16:2,
+MAIL_DIR/foo/baz/new/15:2,
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty path:// search combined to be empty"
+notmuch search path:/baz/ and to:archlinux | notmuch_search_files_sanitize > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty tag: search"
+notmuch search --output=files tag:ment > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty tag:// search"
+notmuch search --output=files tag:/^ment/ > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty tag:// search"
+notmuch search tag:/ment/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)
+thread:XXX   2009-11-18 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] [PATCH] Error out if no query is supplied to search instead of going into an infinite loop (attachment inbox unread)
+thread:XXX   2009-11-17 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty tag:// search combined to be empty"
+notmuch search tag:/ment/ and to:archlinux | notmuch_search_files_sanitize > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty is: search"
+notmuch search --output=files is:ment > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "empty is:// search"
+notmuch search --output=files is:/^ment/ > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty is:// search"
+notmuch search is:/ment/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [2/2] Lars Kellogg-Stedman; [notmuch] "notmuch help" outputs to stderr? (attachment inbox signed unread)
+thread:XXX   2009-11-18 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] [PATCH] Error out if no query is supplied to search instead of going into an infinite loop (attachment inbox unread)
+thread:XXX   2009-11-17 [1/2] Alex Botero-Lowry| Carl Worth; [notmuch] preliminary FreeBSD support (attachment inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty is:// search combined to be empty"
+notmuch search is:/ment/ and to:archlinux | notmuch_search_files_sanitize > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
--
2.11.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

v2 regexp search for mid/folder/path

In reply to this post by David Bremner-2
This is mainly a rebase against current master, but also required an
update to the (just pushed) empty query string handling

Apparently there's at least 4 ways to test a C++ string for being
empty, so here I try a different way of writing it.

Interdiff follows:

diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 26b22fe2..1598c17f 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -156,12 +156,17 @@ RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
 Xapian::Query
 RegexpFieldProcessor::operator() (const std::string & str)
 {
-    if (str.size () == 0)
- return Xapian::Query(Xapian::Query::OP_AND_NOT,
+    if (str.empty ()) {
+ if (options & NOTMUCH_FIELD_PROBABILISTIC) {
+    return Xapian::Query(Xapian::Query::OP_AND_NOT,
      Xapian::Query::MatchAll,
      Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix));
+ } else {
+    return Xapian::Query (term_prefix);
+ }
+    }
 
-    if (str.length() > 0 && str.at (0) == '/') {
+    if (str.at (0) == '/') {
  if (str.length() > 1 && str.at (str.size () - 1) == '/'){
     std::string regexp_str = str.substr(1,str.size () - 2);
     if (slot != Xapian::BAD_VALUENO) {

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/2] lib: Add regexp searching for mid: prefix

The bulk of the change is passing in the field options to the regexp
field processor, so that we can properly handle the
fallback (non-regexp case).
---
 lib/database.cc           |  6 ++++--
 lib/regexp-fields.cc      | 36 +++++++++++++++++++++++++-----------
 lib/regexp-fields.h       |  4 +++-
 test/T650-regexp-query.sh | 16 ++++++++++++++++
 4 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index 5bc131a3..49b3849c 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -262,7 +262,8 @@ prefix_t prefix_table[] = {
     { "tag", "K", NOTMUCH_FIELD_EXTERNAL },
     { "is", "K", NOTMUCH_FIELD_EXTERNAL },
     { "id", "Q", NOTMUCH_FIELD_EXTERNAL },
-    { "mid", "Q", NOTMUCH_FIELD_EXTERNAL },
+    { "mid", "Q", NOTMUCH_FIELD_EXTERNAL |
+ NOTMUCH_FIELD_PROCESSOR },
     { "path", "P", NOTMUCH_FIELD_EXTERNAL },
     { "property", "XPROPERTY", NOTMUCH_FIELD_EXTERNAL },
     /*
@@ -313,7 +314,8 @@ _setup_query_field (const prefix_t *prefix, notmuch_database_t *notmuch)
  else if (STRNCMP_LITERAL(prefix->name, "query") == 0)
     fp = (new QueryFieldProcessor (*notmuch->query_parser, notmuch))->release ();
  else
-    fp = (new RegexpFieldProcessor (prefix->name, *notmuch->query_parser, notmuch))->release ();
+    fp = (new RegexpFieldProcessor (prefix->name, prefix->flags,
+    *notmuch->query_parser, notmuch))->release ();
 
  /* we treat all field-processor fields as boolean in order to get the raw input */
  notmuch->query_parser->add_boolean_prefix (prefix->name, fp);
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 1651677c..7ae55e70 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -135,13 +135,21 @@ static inline Xapian::valueno _find_slot (std::string prefix)
  return NOTMUCH_VALUE_FROM;
     else if (prefix == "subject")
  return NOTMUCH_VALUE_SUBJECT;
+    else if (prefix == "mid")
+ return NOTMUCH_VALUE_MESSAGE_ID;
     else
  throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'");
 }
 
-RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_)
- : slot (_find_slot (prefix)), term_prefix (_find_prefix (prefix.c_str ())),
-  parser (parser_), notmuch (notmuch_)
+RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
+    notmuch_field_flag_t options_,
+    Xapian::QueryParser &parser_,
+    notmuch_database_t *notmuch_)
+ : slot (_find_slot (prefix)),
+  term_prefix (_find_prefix (prefix.c_str ())),
+  options (options_),
+  parser (parser_),
+  notmuch (notmuch_)
 {
 };
 
@@ -161,16 +169,22 @@ RegexpFieldProcessor::operator() (const std::string & str)
     throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
  }
     } else {
- /* TODO replace this with a nicer API level triggering of
- * phrase parsing, when possible */
- std::string query_str;
+ if (options & NOTMUCH_FIELD_PROBABILISTIC) {
+    /* TODO replace this with a nicer API level triggering of
+     * phrase parsing, when possible */
+    std::string query_str;
 
- if (str.find (' ') != std::string::npos)
-    query_str = '"' + str + '"';
- else
-    query_str = str;
+    if (str.find (' ') != std::string::npos)
+ query_str = '"' + str + '"';
+    else
+ query_str = str;
 
- return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
+    return parser.parse_query (query_str, NOTMUCH_QUERY_PARSER_FLAGS, term_prefix);
+ } else {
+    /* Boolean prefix */
+    std::string term = term_prefix + str;
+    return Xapian::Query (term);
+ }
     }
 }
 #endif
diff --git a/lib/regexp-fields.h b/lib/regexp-fields.h
index a4ba7ad8..d5f93445 100644
--- a/lib/regexp-fields.h
+++ b/lib/regexp-fields.h
@@ -65,11 +65,13 @@ class RegexpFieldProcessor : public Xapian::FieldProcessor {
  protected:
     Xapian::valueno slot;
     std::string term_prefix;
+    notmuch_field_flag_t options;
     Xapian::QueryParser &parser;
     notmuch_database_t *notmuch;
 
  public:
-    RegexpFieldProcessor (std::string prefix, Xapian::QueryParser &parser_, notmuch_database_t *notmuch_);
+    RegexpFieldProcessor (std::string prefix, notmuch_field_flag_t options,
+  Xapian::QueryParser &parser_, notmuch_database_t *notmuch_);
 
     ~RegexpFieldProcessor () { };
 
diff --git a/test/T650-regexp-query.sh b/test/T650-regexp-query.sh
index 9599c104..27fc9ab9 100755
--- a/test/T650-regexp-query.sh
+++ b/test/T650-regexp-query.sh
@@ -104,4 +104,20 @@ Query string was: from:/unbalanced[/
 EOF
 test_expect_equal_file EXPECTED OUTPUT
 
+test_begin_subtest "empty mid search"
+notmuch search --output=messages mid:yoom > OUTPUT
+cp /dev/null EXPECTED
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "non-empty mid regex search"
+notmuch search --output=messages mid:/yoom/ > OUTPUT
+test_expect_equal_file cworth.msg-ids OUTPUT
+
+test_begin_subtest "combine regexp mid and subject"
+notmuch search  subject:/-C/ and mid:/y..m/ | notmuch_search_sanitize > OUTPUT
+cat <<EOF > EXPECTED
+thread:XXX   2009-11-18 [1/2] Carl Worth| Jan Janak; [notmuch] [PATCH] Older versions of install do not support -C. (inbox unread)
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
--
2.11.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 2/2] lib: Add regexp expansion for for tags and paths

In reply to this post by David Bremner-2
From a ui perspective this looks similar to what was already provided
for from, subject, and mid, but the implimentation is quite
different. It uses the database's list of terms to construct a term
based query equivalent to the passed regular expression.
---
 lib/database.cc      | 12 ++++++++----
 lib/regexp-fields.cc | 31 +++++++++++++++++++++++++------
 2 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/lib/database.cc b/lib/database.cc
index 49b3849c..5b13f541 100644
--- a/lib/database.cc
+++ b/lib/database.cc
@@ -259,12 +259,15 @@ prefix_t prefix_table[] = {
     { "file-direntry", "XFDIRENTRY", NOTMUCH_FIELD_NO_FLAGS },
     { "directory-direntry", "XDDIRENTRY", NOTMUCH_FIELD_NO_FLAGS },
     { "thread", "G", NOTMUCH_FIELD_EXTERNAL },
-    { "tag", "K", NOTMUCH_FIELD_EXTERNAL },
-    { "is", "K", NOTMUCH_FIELD_EXTERNAL },
+    { "tag", "K", NOTMUCH_FIELD_EXTERNAL |
+ NOTMUCH_FIELD_PROCESSOR },
+    { "is", "K", NOTMUCH_FIELD_EXTERNAL |
+        NOTMUCH_FIELD_PROCESSOR },
     { "id", "Q", NOTMUCH_FIELD_EXTERNAL },
     { "mid", "Q", NOTMUCH_FIELD_EXTERNAL |
  NOTMUCH_FIELD_PROCESSOR },
-    { "path", "P", NOTMUCH_FIELD_EXTERNAL },
+    { "path", "P", NOTMUCH_FIELD_EXTERNAL|
+ NOTMUCH_FIELD_PROCESSOR },
     { "property", "XPROPERTY", NOTMUCH_FIELD_EXTERNAL },
     /*
      * Unconditionally add ':' to reduce potential ambiguity with
@@ -272,7 +275,8 @@ prefix_t prefix_table[] = {
      * letters. See Xapian document termprefixes.html for related
      * discussion.
      */
-    { "folder", "XFOLDER:", NOTMUCH_FIELD_EXTERNAL },
+    { "folder", "XFOLDER:", NOTMUCH_FIELD_EXTERNAL |
+ NOTMUCH_FIELD_PROCESSOR },
 #if HAVE_XAPIAN_FIELD_PROCESSOR
     { "date", NULL, NOTMUCH_FIELD_EXTERNAL |
  NOTMUCH_FIELD_PROCESSOR },
diff --git a/lib/regexp-fields.cc b/lib/regexp-fields.cc
index 7ae55e70..1598c17f 100644
--- a/lib/regexp-fields.cc
+++ b/lib/regexp-fields.cc
@@ -138,7 +138,7 @@ static inline Xapian::valueno _find_slot (std::string prefix)
     else if (prefix == "mid")
  return NOTMUCH_VALUE_MESSAGE_ID;
     else
- throw Xapian::QueryParserError ("unsupported regexp field '" + prefix + "'");
+ return Xapian::BAD_VALUENO;
 }
 
 RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
@@ -156,15 +156,34 @@ RegexpFieldProcessor::RegexpFieldProcessor (std::string prefix,
 Xapian::Query
 RegexpFieldProcessor::operator() (const std::string & str)
 {
-    if (str.size () == 0)
- return Xapian::Query(Xapian::Query::OP_AND_NOT,
+    if (str.empty ()) {
+ if (options & NOTMUCH_FIELD_PROBABILISTIC) {
+    return Xapian::Query(Xapian::Query::OP_AND_NOT,
      Xapian::Query::MatchAll,
      Xapian::Query (Xapian::Query::OP_WILDCARD, term_prefix));
+ } else {
+    return Xapian::Query (term_prefix);
+ }
+    }
 
     if (str.at (0) == '/') {
- if (str.at (str.size () - 1) == '/'){
-    RegexpPostingSource *postings = new RegexpPostingSource (slot, str.substr(1,str.size () - 2));
-    return Xapian::Query (postings->release ());
+ if (str.length() > 1 && str.at (str.size () - 1) == '/'){
+    std::string regexp_str = str.substr(1,str.size () - 2);
+    if (slot != Xapian::BAD_VALUENO) {
+ RegexpPostingSource *postings = new RegexpPostingSource (slot, regexp_str);
+ return Xapian::Query (postings->release ());
+    } else {
+ std::vector<std::string> terms;
+ regex_t regexp;
+
+ compile_regex(regexp, regexp_str.c_str ());
+ for (Xapian::TermIterator it = notmuch->xapian_db->allterms_begin (term_prefix);
+     it != notmuch->xapian_db->allterms_end (); ++it) {
+    if (regexec (&regexp, (*it).c_str (), 0, NULL, 0) == 0)
+ terms.push_back(*it);
+ }
+ return Xapian::Query (Xapian::Query::OP_OR, terms.begin(), terms.end());
+    }
  } else {
     throw Xapian::QueryParserError ("unmatched regex delimiter in '" + str + "'");
  }
--
2.11.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch