v1: fix for threading of replies to ghost messages

classic Classic list List threaded Threaded
13 messages Options
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

v1: fix for threading of replies to ghost messages

See the thread at id:[hidden email] for discussion.

This series finally bites the bullet and uses the References header
for threading.

Please test this series, there are lots of tricky cases with threading.


_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/9] util: add DEBUG_PRINTF, rename error_util.h -> debug_print.h

It seemed silly to have two headers that were almost copies.
The new macro is intended for debugging, and probably should not be
left in released code.
---
 command-line-arguments.c             |  2 +-
 lib/notmuch-private.h                |  2 +-
 util/Makefile.local                  |  4 +--
 util/debug_print.c                   | 26 ++++++++++++++++++
 util/{error_util.h => debug_print.h} | 22 ++++++++++++---
 util/error_util.c                    | 40 ----------------------------
 util/hex-escape.c                    |  2 +-
 util/util.c                          |  2 +-
 util/xutil.c                         |  2 +-
 9 files changed, 51 insertions(+), 51 deletions(-)
 create mode 100644 util/debug_print.c
 rename util/{error_util.h => debug_print.h} (75%)
 delete mode 100644 util/error_util.c

diff --git a/command-line-arguments.c b/command-line-arguments.c
index d64aa85b..90d69453 100644
--- a/command-line-arguments.c
+++ b/command-line-arguments.c
@@ -1,7 +1,7 @@
 #include <assert.h>
 #include <string.h>
 #include <stdio.h>
-#include "error_util.h"
+#include "debug_print.h"
 #include "command-line-arguments.h"
 
 typedef enum {
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 3764a6a9..499d73d4 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -50,7 +50,7 @@ NOTMUCH_BEGIN_DECLS
 #include "gmime-extra.h"
 
 #include "xutil.h"
-#include "error_util.h"
+#include "debug_print.h"
 #include "string-util.h"
 #include "crypto.h"
 
diff --git a/util/Makefile.local b/util/Makefile.local
index ba03230e..ccf1bd79 100644
--- a/util/Makefile.local
+++ b/util/Makefile.local
@@ -3,9 +3,9 @@
 dir := util
 extra_cflags += -I$(srcdir)/$(dir)
 
-libnotmuch_util_c_srcs := $(dir)/xutil.c $(dir)/error_util.c $(dir)/hex-escape.c \
+libnotmuch_util_c_srcs := $(dir)/xutil.c $(dir)/hex-escape.c \
   $(dir)/string-util.c $(dir)/talloc-extra.c $(dir)/zlib-extra.c \
- $(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c
+ $(dir)/util.c $(dir)/gmime-extra.c $(dir)/crypto.c $(dir)/debug_print.c
 
 libnotmuch_util_modules := $(libnotmuch_util_c_srcs:.c=.o)
 
diff --git a/util/debug_print.c b/util/debug_print.c
new file mode 100644
index 00000000..9f00f69b
--- /dev/null
+++ b/util/debug_print.c
@@ -0,0 +1,26 @@
+#include "debug_print.h"
+
+void
+_debug_printf (const char *format, ...)
+{
+    va_list va_args;
+
+    va_start (va_args, format);
+    vfprintf (stderr, format, va_args);
+    va_end (va_args);
+    
+}
+
+void
+_internal_error (const char *format, ...)
+{
+    va_list va_args;
+
+    va_start (va_args, format);
+
+    fprintf (stderr, "Internal error: ");
+    vfprintf (stderr, format, va_args);
+
+    va_end (va_args);
+    exit (1);
+}
diff --git a/util/error_util.h b/util/debug_print.h
similarity index 75%
rename from util/error_util.h
rename to util/debug_print.h
index 4bb338a2..4a096945 100644
--- a/util/error_util.h
+++ b/util/debug_print.h
@@ -1,6 +1,7 @@
-/* error_util.h - Provide the INTERNAL_ERROR macro
+/* debug_print.h - Provide the INTERNAL_ERROR and DEBUG_PRINTF macros
  *
  * Copyright © 2009 Carl Worth
+ * Copyright © 2018 David Bremner
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -18,13 +19,26 @@
  * Author: Carl Worth <[hidden email]>
  */
 
-#ifndef ERROR_UTIL_H
-#define ERROR_UTIL_H
+#ifndef DEBUG_PRINT_H
+#define DEBUG_PRINT_H
 
 #include <talloc.h>
-
 #include "function-attributes.h"
 
+void
+_debug_printf (const char *format, ...) PRINTF_ATTRIBUTE (1, 2);
+
+/*
+ * Provide a quick way to silence debugging output.
+ */
+
+#ifdef DEBUG_PRINT
+#define DEBUG_PRINTF(format, ...) _debug_printf(format " (%s).\n", \
+ ##__VA_ARGS__, __location__)
+#else
+#define DEBUG_PRINTF(format, ...) /* ignored */
+#endif
+
 /* There's no point in continuing when we've detected that we've done
  * something wrong internally (as opposed to the user passing in a
  * bogus value).
diff --git a/util/error_util.c b/util/error_util.c
deleted file mode 100644
index e64162c7..00000000
--- a/util/error_util.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/* error_util.c - internal error utilities for notmuch.
- *
- * Copyright © 2009 Carl Worth
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see https://www.gnu.org/licenses/ .
- *
- * Author: Carl Worth <[hidden email]>
- */
-
-#include <stdlib.h>
-#include <stdarg.h>
-#include <stdio.h>
-
-#include "error_util.h"
-
-void
-_internal_error (const char *format, ...)
-{
-    va_list va_args;
-
-    va_start (va_args, format);
-
-    fprintf (stderr, "Internal error: ");
-    vfprintf (stderr, format, va_args);
-
-    va_end (va_args);
-    exit (1);
-}
-
diff --git a/util/hex-escape.c b/util/hex-escape.c
index 8883ff90..ff20c702 100644
--- a/util/hex-escape.c
+++ b/util/hex-escape.c
@@ -22,7 +22,7 @@
 #include <string.h>
 #include <talloc.h>
 #include <ctype.h>
-#include "error_util.h"
+#include "debug_print.h"
 #include "hex-escape.h"
 
 static const char *output_charset =
diff --git a/util/util.c b/util/util.c
index 06659b35..75df0ead 100644
--- a/util/util.c
+++ b/util/util.c
@@ -1,5 +1,5 @@
 #include "util.h"
-#include "error_util.h"
+#include "debug_print.h"
 #include <string.h>
 #include <errno.h>
 
diff --git a/util/xutil.c b/util/xutil.c
index f211eaaa..11042c9e 100644
--- a/util/xutil.c
+++ b/util/xutil.c
@@ -22,7 +22,7 @@
 #include <string.h>
 
 #include "xutil.h"
-#include "error_util.h"
+#include "debug_print.h"
 
 void *
 xcalloc (size_t nmemb, size_t size)
--
2.18.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 2/9] test: start threading test corpus

In reply to this post by David Bremner-2
The first set of messages in this corpus contains a reply to a
non-existent message, which causes it to be mistakenly classified as a
toplevel message in the thread.
---
 test/corpora/threading/ghost-root/child             | 9 +++++++++
 test/corpora/threading/ghost-root/fake-root         | 9 +++++++++
 test/corpora/threading/ghost-root/grand-child       | 9 +++++++++
 test/corpora/threading/ghost-root/grand-child2      | 9 +++++++++
 test/corpora/threading/ghost-root/great-grand-child | 9 +++++++++
 test/corpora/threading/ghost-root/real-root         | 7 +++++++
 6 files changed, 52 insertions(+)
 create mode 100644 test/corpora/threading/ghost-root/child
 create mode 100644 test/corpora/threading/ghost-root/fake-root
 create mode 100644 test/corpora/threading/ghost-root/grand-child
 create mode 100644 test/corpora/threading/ghost-root/grand-child2
 create mode 100644 test/corpora/threading/ghost-root/great-grand-child
 create mode 100644 test/corpora/threading/ghost-root/real-root

diff --git a/test/corpora/threading/ghost-root/child b/test/corpora/threading/ghost-root/child
new file mode 100644
index 00000000..4c36af95
--- /dev/null
+++ b/test/corpora/threading/ghost-root/child
@@ -0,0 +1,9 @@
+From: Alice <[hidden email]>
+To: Daniel <[hidden email]>
+Subject: child message
+Message-ID: <[hidden email]>
+In-Reply-To: <[hidden email]>
+References:  <[hidden email]>
+Date: Fri, 17 Jun 2016 22:14:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/fake-root b/test/corpora/threading/ghost-root/fake-root
new file mode 100644
index 00000000..102bb228
--- /dev/null
+++ b/test/corpora/threading/ghost-root/fake-root
@@ -0,0 +1,9 @@
+From: Mallory <[hidden email]>
+To: Daniel <[hidden email]>
+Subject: fake root message
+Message-ID: <[hidden email]>
+In-Reply-to: <[hidden email]>
+References: <[hidden email]> <[hidden email]> <[hidden email]>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+This message has no reply-to
diff --git a/test/corpora/threading/ghost-root/grand-child b/test/corpora/threading/ghost-root/grand-child
new file mode 100644
index 00000000..5f77ac36
--- /dev/null
+++ b/test/corpora/threading/ghost-root/grand-child
@@ -0,0 +1,9 @@
+From: Alice <[hidden email]>
+To: Daniel <[hidden email]>
+Subject: grand-child message
+Message-ID: <[hidden email]>
+In-Reply-To: <[hidden email]>
+References:  <[hidden email]> <[hidden email]>
+Date: Fri, 17 Jun 2016 22:24:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/grand-child2 b/test/corpora/threading/ghost-root/grand-child2
new file mode 100644
index 00000000..59682a95
--- /dev/null
+++ b/test/corpora/threading/ghost-root/grand-child2
@@ -0,0 +1,9 @@
+From: Daniel <[hidden email]>
+To: Alice <[hidden email]>
+Subject: grand-child message 2
+Message-ID: <[hidden email]>
+In-Reply-To: <[hidden email]>
+References:  <[hidden email]> <[hidden email]>
+Date: Fri, 17 Jun 2016 22:34:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/great-grand-child b/test/corpora/threading/ghost-root/great-grand-child
new file mode 100644
index 00000000..287a8954
--- /dev/null
+++ b/test/corpora/threading/ghost-root/great-grand-child
@@ -0,0 +1,9 @@
+From: Alice <[hidden email]>
+To: Daniel <[hidden email]>
+Subject: great grand-child message
+Message-ID: <[hidden email]>
+In-Reply-To: <[hidden email]>
+References:  <[hidden email]> <[hidden email]>
+Date: Fri, 17 Jun 2016 22:44:41 -0400
+
+
diff --git a/test/corpora/threading/ghost-root/real-root b/test/corpora/threading/ghost-root/real-root
new file mode 100644
index 00000000..97c9a6c4
--- /dev/null
+++ b/test/corpora/threading/ghost-root/real-root
@@ -0,0 +1,7 @@
+From: Alice <[hidden email]>
+To: Daniel <[hidden email]>
+Subject: root message
+Message-ID: <[hidden email]>
+Date: Thu, 16 Jun 2016 22:14:41 -0400
+
+This message has no reply-to
--
2.18.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 3/9] test: add known broken tests for "ghost roots"

In reply to this post by David Bremner-2
This documents the bug discussed at

     id:[hidden email]

The underlying issue is that the reply to a ghost (missing) message is
falsely classified as a root message in _resolve_thread_relationships.

The first test is simpler / more robust, but also easier to fool.
---
 test/T260-thread-order.sh                   | 27 +++++++++++++++++++++
 test/corpora/threading/ghost-root/fake-root |  2 +-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/test/T260-thread-order.sh b/test/T260-thread-order.sh
index fea61275..ce8636b9 100755
--- a/test/T260-thread-order.sh
+++ b/test/T260-thread-order.sh
@@ -75,4 +75,31 @@ $(for ((i = 0; i < $nthreads; i++)); do
 done
 test_expect_equal "$output" "$expected"
 
+add_email_corpus threading
+
+test_begin_subtest "reply to ghost"
+test_subtest_known_broken
+notmuch show --entire-thread=true id:[hidden email] | grep ^Subject: | head -1  > OUTPUT
+cat <<EOF > EXPECTED
+Subject: root message
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
+test_begin_subtest "reply to ghost (tree view)"
+test_subtest_known_broken
+test_emacs '(notmuch-tree "tag:inbox")
+    (notmuch-test-wait)
+    (test-output)
+    (delete-other-windows)'
+cat <<EOF > EXPECTED
+  2016-06-17  Alice                 ┬►root message                                        (inbox unread)
+  2016-06-18  Alice                 ╰┬►child message                                      (inbox unread)
+  2016-06-18  Alice                  ├┬►grand-child message                               (inbox unread)
+  2016-06-18  Alice                  │╰─►great grand-child message                        (inbox unread)
+  2016-06-18  Daniel                 ├─►grand-child message 2                             (inbox unread)
+  2016-06-17  Mallory                ╰─►fake root message                                 (inbox unread)
+End of search results.
+EOF
+test_expect_equal_file EXPECTED OUTPUT
+
 test_done
diff --git a/test/corpora/threading/ghost-root/fake-root b/test/corpora/threading/ghost-root/fake-root
index 102bb228..5be228fd 100644
--- a/test/corpora/threading/ghost-root/fake-root
+++ b/test/corpora/threading/ghost-root/fake-root
@@ -6,4 +6,4 @@ In-Reply-to: <[hidden email]>
 References: <[hidden email]> <[hidden email]> <[hidden email]>
 Date: Thu, 16 Jun 2016 22:14:41 -0400
 
-This message has no reply-to
+This message has a reply-to to a non-existent message
--
2.18.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 4/9] lib: read reference terms into message struct.

In reply to this post by David Bremner-2
The plan is to use these in resolving threads.
---
 lib/message.cc        | 17 +++++++++++++++++
 lib/notmuch-private.h |  3 +++
 2 files changed, 20 insertions(+)

diff --git a/lib/message.cc b/lib/message.cc
index 153e4bed..790d26f5 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -41,6 +41,7 @@ struct _notmuch_message {
     notmuch_message_file_t *message_file;
     notmuch_string_list_t *property_term_list;
     notmuch_string_map_t *property_map;
+    notmuch_string_list_t *reference_list;
     notmuch_message_list_t *replies;
     unsigned long flags;
     /* For flags that are initialized on-demand, lazy_flags indicates
@@ -129,6 +130,7 @@ _notmuch_message_create_for_document (const void *talloc_owner,
     message->author = NULL;
     message->property_term_list = NULL;
     message->property_map = NULL;
+    message->reference_list = NULL;
 
     message->replies = _notmuch_message_list_create (message);
     if (unlikely (message->replies == NULL)) {
@@ -349,6 +351,7 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message, void *field)
  *type_prefix = _find_prefix ("type"),
  *filename_prefix = _find_prefix ("file-direntry"),
  *property_prefix = _find_prefix ("property"),
+ *reference_prefix = _find_prefix ("reference"),
  *replyto_prefix = _find_prefix ("replyto");
 
     /* We do this all in a single pass because Xapian decompresses the
@@ -413,6 +416,14 @@ _notmuch_message_ensure_metadata (notmuch_message_t *message, void *field)
     _notmuch_database_get_terms_with_prefix (message, i, end,
  property_prefix);
 
+    /* get references */
+    assert (strcmp (property_prefix, reference_prefix) < 0);
+    if (!message->reference_list) {
+ message->reference_list =
+    _notmuch_database_get_terms_with_prefix (message, i, end,
+     reference_prefix);
+    }
+
     /* Get reply to */
     assert (strcmp (property_prefix, replyto_prefix) < 0);
     if (!message->in_reply_to)
@@ -588,6 +599,12 @@ _notmuch_message_add_reply (notmuch_message_t *message,
     _notmuch_message_list_add_message (message->replies, reply);
 }
 
+const notmuch_string_list_t *
+_notmuch_message_get_references (notmuch_message_t *message)
+{
+    return message->reference_list;
+}
+
 notmuch_messages_t *
 notmuch_message_get_replies (notmuch_message_t *message)
 {
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index 499d73d4..e7bbc156 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -580,6 +580,9 @@ _notmuch_string_list_append (notmuch_string_list_t *list,
 void
 _notmuch_string_list_sort (notmuch_string_list_t *list);
 
+const notmuch_string_list_t *
+_notmuch_message_get_references(notmuch_message_t *message);
+
 /* string-map.c */
 typedef struct _notmuch_string_map  notmuch_string_map_t;
 typedef struct _notmuch_string_map_iterator notmuch_string_map_iterator_t;
--
2.18.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 5/9] lib/thread: refactor in-reply-to test.

In reply to this post by David Bremner-2
This is not a complete win in code-size, but it makes the code (which
is about to get more complicated) easier to follow
---
 lib/thread.cc | 40 +++++++++++++++++++++++-----------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/lib/thread.cc b/lib/thread.cc
index e961c76b..93508359 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -387,12 +387,30 @@ _thread_add_matched_message (notmuch_thread_t *thread,
     _thread_add_matched_author (thread, _notmuch_message_get_author (hashed_message));
 }
 
+static bool
+_parent_via_in_reply_to (notmuch_thread_t *thread, notmuch_message_t *message) {
+    notmuch_message_t *parent;
+    const char *in_reply_to;
+
+    in_reply_to = _notmuch_message_get_in_reply_to (message);
+    DEBUG_PRINTF("checking in_reply_to=%s\n", in_reply_to);
+
+    if (in_reply_to && strlen (in_reply_to) &&
+ g_hash_table_lookup_extended (thread->message_hash,
+      in_reply_to, NULL,
+      (void **) &parent)) {
+ _notmuch_message_add_reply (parent, message);
+ return true;
+    } else {
+ return false;
+    }
+}
+
 static void
 _resolve_thread_relationships (notmuch_thread_t *thread)
 {
     notmuch_message_node_t *node, *first_node;
-    notmuch_message_t *message, *parent;
-    const char *in_reply_to;
+    notmuch_message_t *message;
 
     first_node = thread->message_list->head;
     if (! first_node)
@@ -400,13 +418,7 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
 
     for (node = first_node->next; node; node = node->next) {
  message = node->message;
- in_reply_to = _notmuch_message_get_in_reply_to (message);
- if (in_reply_to && strlen (in_reply_to) &&
-    g_hash_table_lookup_extended (thread->message_hash,
-  in_reply_to, NULL,
-  (void **) &parent))
-    _notmuch_message_add_reply (parent, message);
- else
+ if (! _parent_via_in_reply_to (thread, message))
     _notmuch_message_list_add_message (thread->toplevel_list, message);
     }
 
@@ -418,14 +430,8 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
      */
     if (first_node) {
  message = first_node->message;
- in_reply_to = _notmuch_message_get_in_reply_to (message);
- if (thread->toplevel_list->head &&
-    in_reply_to && strlen (in_reply_to) &&
-    g_hash_table_lookup_extended (thread->message_hash,
-  in_reply_to, NULL,
-  (void **) &parent))
-    _notmuch_message_add_reply (parent, message);
- else
+ if (! thread->toplevel_list->head ||
+    ! _parent_via_in_reply_to (thread, message))
     _notmuch_message_list_add_message (thread->toplevel_list, message);
     }
 
--
2.18.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 6/9] lib: initial fix for "ghost replyto"

In reply to this post by David Bremner-2
This fixes the failing test of _resolve_thread_relationships
introduced above, but will probably place messages in-reply-to
ghosts (i.e. in-reply-to missing messages) too far up in the thread
in more complicated examples. In particular it does not follow the
suggestion in the XXX: comment to choose the deepest parent.
---
 lib/thread.cc             | 53 ++++++++++++++++++++++++++++++++++-----
 test/T260-thread-order.sh |  1 -
 2 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/lib/thread.cc b/lib/thread.cc
index 93508359..417235ea 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -406,22 +406,52 @@ _parent_via_in_reply_to (notmuch_thread_t *thread, notmuch_message_t *message) {
     }
 }
 
+static void
+_parent_or_toplevel (notmuch_thread_t *thread, notmuch_message_t *message)
+{
+    bool found = false;
+    notmuch_message_t *parent = NULL;
+    const notmuch_string_list_t *references =
+ _notmuch_message_get_references (message);
+    for (notmuch_string_node_t *ref_node = references->head;
+ ! found && ref_node; ref_node = ref_node->next) {
+ if ((found = g_hash_table_lookup_extended (thread->message_hash,
+   ref_node->string, NULL,
+   (void **) &parent))) {
+    _notmuch_message_add_reply (parent, message);
+ }
+    }
+    if (! found)
+ _notmuch_message_list_add_message (thread->toplevel_list, message);
+}
+
 static void
 _resolve_thread_relationships (notmuch_thread_t *thread)
 {
     notmuch_message_node_t *node, *first_node;
     notmuch_message_t *message;
+    void *local;
+    notmuch_message_list_t *maybe_toplevel_list;
 
     first_node = thread->message_list->head;
     if (! first_node)
  return;
 
+    local = talloc_new (thread);
+    maybe_toplevel_list = _notmuch_message_list_create (local);
+
     for (node = first_node->next; node; node = node->next) {
  message = node->message;
  if (! _parent_via_in_reply_to (thread, message))
-    _notmuch_message_list_add_message (thread->toplevel_list, message);
+    _notmuch_message_list_add_message (maybe_toplevel_list, message);
     }
 
+    for (notmuch_messages_t *roots = _notmuch_messages_create (maybe_toplevel_list);
+ notmuch_messages_valid (roots);
+ notmuch_messages_move_to_next (roots)) {
+ notmuch_message_t *message = notmuch_messages_get (roots);
+ _parent_or_toplevel (thread, message);
+    }
     /*
      * if we reach the end of the list without finding a top-level
      * message, that means the thread is a cycle (or set of cycles)
@@ -431,20 +461,31 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
     if (first_node) {
  message = first_node->message;
  if (! thread->toplevel_list->head ||
-    ! _parent_via_in_reply_to (thread, message))
-    _notmuch_message_list_add_message (thread->toplevel_list, message);
+    ! _parent_via_in_reply_to (thread, message)) {
+    /*
+     * If the oldest message happens to be in-reply-to a
+     * missing message, we only check for references if there
+     * is some other candidate for root message.
+     */
+    if (thread->toplevel_list->head)
+ _parent_or_toplevel (thread, message);
+    else
+ _notmuch_message_list_add_message (thread->toplevel_list, message);
+ }
     }
 
     /* XXX: After scanning through the entire list looking for parents
      * via "In-Reply-To", we should do a second pass that looks at the
-     * list of messages IDs in the "References" header instead. (And
-     * for this the parent would be the "deepest" message of all the
-     * messages found in the "References" list.)
+     * list of messages IDs in the "References" header instead.
+     * Unlike the current quick fix, the parent should be the
+     * "deepest" message of all the messages found in the "References"
+     * list.
      *
      * Doing this will allow messages and sub-threads to be positioned
      * correctly in the thread even when an intermediate message is
      * missing from the thread.
      */
+    talloc_free (local);
 }
 
 /* Create a new notmuch_thread_t object by finding the thread
diff --git a/test/T260-thread-order.sh b/test/T260-thread-order.sh
index ce8636b9..9565f296 100755
--- a/test/T260-thread-order.sh
+++ b/test/T260-thread-order.sh
@@ -78,7 +78,6 @@ test_expect_equal "$output" "$expected"
 add_email_corpus threading
 
 test_begin_subtest "reply to ghost"
-test_subtest_known_broken
 notmuch show --entire-thread=true id:[hidden email] | grep ^Subject: | head -1  > OUTPUT
 cat <<EOF > EXPECTED
 Subject: root message
--
2.18.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 7/9] lib: calculate message depth in thread

In reply to this post by David Bremner-2
This will be used in reparenting messages without useful in-reply-to,
but with useful references
---
 lib/message.cc        | 23 +++++++++++++++++++++++
 lib/notmuch-private.h |  5 +++++
 2 files changed, 28 insertions(+)

diff --git a/lib/message.cc b/lib/message.cc
index 790d26f5..1567f14b 100644
--- a/lib/message.cc
+++ b/lib/message.cc
@@ -32,6 +32,7 @@ struct _notmuch_message {
     int frozen;
     char *message_id;
     char *thread_id;
+    size_t thread_depth;
     char *in_reply_to;
     notmuch_string_list_t *tag_list;
     notmuch_string_list_t *filename_term_list;
@@ -118,6 +119,9 @@ _notmuch_message_create_for_document (const void *talloc_owner,
     /* the message is initially not synchronized with Xapian */
     message->last_view = 0;
 
+    /* Calculated after the thread structure is computed */
+    message->thread_depth = 0;
+
     /* Each of these will be lazily created as needed. */
     message->message_id = NULL;
     message->thread_id = NULL;
@@ -599,6 +603,25 @@ _notmuch_message_add_reply (notmuch_message_t *message,
     _notmuch_message_list_add_message (message->replies, reply);
 }
 
+size_t
+_notmuch_message_get_thread_depth (notmuch_message_t *message) {
+    return message->thread_depth;
+}
+
+void
+_notmuch_message_label_depths (notmuch_message_t *message,
+       size_t depth)
+{
+    message->thread_depth = depth;
+
+    for (notmuch_messages_t *messages = _notmuch_messages_create (message->replies);
+ notmuch_messages_valid (messages);
+ notmuch_messages_move_to_next (messages)) {
+ notmuch_message_t *child = notmuch_messages_get (messages);
+ _notmuch_message_label_depths (child, depth+1);
+    }
+}
+
 const notmuch_string_list_t *
 _notmuch_message_get_references (notmuch_message_t *message)
 {
diff --git a/lib/notmuch-private.h b/lib/notmuch-private.h
index e7bbc156..385cf7a5 100644
--- a/lib/notmuch-private.h
+++ b/lib/notmuch-private.h
@@ -539,6 +539,11 @@ _notmuch_message_remove_unprefixed_terms (notmuch_message_t *message);
 const char *
 _notmuch_message_get_thread_id_only(notmuch_message_t *message);
 
+size_t _notmuch_message_get_thread_depth (notmuch_message_t *message);
+
+void
+_notmuch_message_label_depths (notmuch_message_t *message,
+       size_t depth);
 /* sha1.c */
 
 char *
--
2.18.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 8/9] lib/thread: rewrite _parent_or_toplevel to use depths

In reply to this post by David Bremner-2
This is part 1/2 of changing the reparenting of alleged toplevel
messages to use a "deep" reference rather than just the first one
found.
---
 lib/thread.cc | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/lib/thread.cc b/lib/thread.cc
index 417235ea..2eed724b 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -409,20 +409,40 @@ _parent_via_in_reply_to (notmuch_thread_t *thread, notmuch_message_t *message) {
 static void
 _parent_or_toplevel (notmuch_thread_t *thread, notmuch_message_t *message)
 {
-    bool found = false;
+    size_t max_depth = 0;
+    notmuch_message_t *new_parent;
     notmuch_message_t *parent = NULL;
     const notmuch_string_list_t *references =
  _notmuch_message_get_references (message);
+
+    DEBUG_PRINTF("trying to reparent via references: %s\n",
+     notmuch_message_get_message_id (message));
+
     for (notmuch_string_node_t *ref_node = references->head;
- ! found && ref_node; ref_node = ref_node->next) {
- if ((found = g_hash_table_lookup_extended (thread->message_hash,
-   ref_node->string, NULL,
-   (void **) &parent))) {
-    _notmuch_message_add_reply (parent, message);
+ ref_node; ref_node = ref_node->next) {
+ DEBUG_PRINTF("checking reference=%s\n", ref_node->string);
+ if ((g_hash_table_lookup_extended (thread->message_hash,
+   ref_node->string, NULL,
+   (void **) &new_parent))) {
+    size_t new_depth = _notmuch_message_get_thread_depth (new_parent);
+    DEBUG_PRINTF("got depth %lu\n", new_depth);
+    if (new_depth > max_depth || !parent) {
+ DEBUG_PRINTF("adding at depth %lu parent=%s\n", new_depth, ref_node->string);
+ max_depth = new_depth;
+ parent = new_parent;
+    }
  }
     }
-    if (! found)
+    if (parent) {
+ DEBUG_PRINTF("adding reply %s to parent=%s\n",
+ notmuch_message_get_message_id (message),
+ notmuch_message_get_message_id (parent));
+ _notmuch_message_add_reply (parent, message);
+    } else {
+ DEBUG_PRINTF("adding as toplevel %s\n",
+ notmuch_message_get_message_id (message));
  _notmuch_message_list_add_message (thread->toplevel_list, message);
+    }
 }
 
 static void
--
2.18.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 9/9] lib/thread: change _resolve_thread_relationships to use depths

In reply to this post by David Bremner-2
We (finally) implement the XXX comment. It requires a bit of care not
to reparent all of the possible toplevel messages. There is some
slightly naughty low level access to the list structures here that can
be cleaned up in a later commit.
---
 lib/thread.cc             | 53 +++++++++++++++++++--------------------
 test/T260-thread-order.sh |  1 -
 2 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/lib/thread.cc b/lib/thread.cc
index 2eed724b..6a567caa 100644
--- a/lib/thread.cc
+++ b/lib/thread.cc
@@ -466,12 +466,6 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
     _notmuch_message_list_add_message (maybe_toplevel_list, message);
     }
 
-    for (notmuch_messages_t *roots = _notmuch_messages_create (maybe_toplevel_list);
- notmuch_messages_valid (roots);
- notmuch_messages_move_to_next (roots)) {
- notmuch_message_t *message = notmuch_messages_get (roots);
- _parent_or_toplevel (thread, message);
-    }
     /*
      * if we reach the end of the list without finding a top-level
      * message, that means the thread is a cycle (or set of cycles)
@@ -480,31 +474,36 @@ _resolve_thread_relationships (notmuch_thread_t *thread)
      */
     if (first_node) {
  message = first_node->message;
- if (! thread->toplevel_list->head ||
+ DEBUG_PRINTF("checking first message  %s\n",
+     notmuch_message_get_message_id (message));
+
+ if (! maybe_toplevel_list->head ||
     ! _parent_via_in_reply_to (thread, message)) {
-    /*
-     * If the oldest message happens to be in-reply-to a
-     * missing message, we only check for references if there
-     * is some other candidate for root message.
-     */
-    if (thread->toplevel_list->head)
- _parent_or_toplevel (thread, message);
-    else
- _notmuch_message_list_add_message (thread->toplevel_list, message);
+    DEBUG_PRINTF("adding first message as toplevel = %s\n",
+     notmuch_message_get_message_id (message));
+    _notmuch_message_list_add_message (maybe_toplevel_list, message);
  }
     }
 
-    /* XXX: After scanning through the entire list looking for parents
-     * via "In-Reply-To", we should do a second pass that looks at the
-     * list of messages IDs in the "References" header instead.
-     * Unlike the current quick fix, the parent should be the
-     * "deepest" message of all the messages found in the "References"
-     * list.
-     *
-     * Doing this will allow messages and sub-threads to be positioned
-     * correctly in the thread even when an intermediate message is
-     * missing from the thread.
-     */
+    for (notmuch_messages_t *messages = _notmuch_messages_create (maybe_toplevel_list);
+ notmuch_messages_valid (messages);
+ notmuch_messages_move_to_next (messages))
+    {
+ notmuch_message_t *message = notmuch_messages_get (messages);
+ _notmuch_message_label_depths (message, 0);
+    }
+
+    for (notmuch_messages_t *roots = _notmuch_messages_create (maybe_toplevel_list);
+ notmuch_messages_valid (roots);
+ notmuch_messages_move_to_next (roots)) {
+ notmuch_message_t *message = notmuch_messages_get (roots);
+ /* XXX add _notmuch_messages_count */
+ if (roots->iterator->next || thread->toplevel_list->head)
+    _parent_or_toplevel (thread, message);
+ else
+    _notmuch_message_list_add_message (thread->toplevel_list, message);
+    }
+
     talloc_free (local);
 }
 
diff --git a/test/T260-thread-order.sh b/test/T260-thread-order.sh
index 9565f296..0dd985c2 100755
--- a/test/T260-thread-order.sh
+++ b/test/T260-thread-order.sh
@@ -85,7 +85,6 @@ EOF
 test_expect_equal_file EXPECTED OUTPUT
 
 test_begin_subtest "reply to ghost (tree view)"
-test_subtest_known_broken
 test_emacs '(notmuch-tree "tag:inbox")
     (notmuch-test-wait)
     (test-output)
--
2.18.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
Gregor Zattler Gregor Zattler
Reply | Threaded
Open this post in threaded view
|

Re: v1: fix for threading of replies to ghost messages

In reply to this post by David Bremner-2
Hi David,
* David Bremner <[hidden email]> [2018-07-21; 08:37]:
> See the thread at id:[hidden email] for discussion.
>
> This series finally bites the bullet and uses the References header
> for threading.
>
> Please test this series, there are lots of tricky cases with threading.

I applied the patches and looked with notmuch-emacs for threads
which before would probably show wrong ordering of messages.  The
half a dozen threads were all shown in the correct order with
respect to parents and children, but I found from those not
disclosable RT ticket mails (I mentioned in above referenced
thread):

- Threads where the "Resolved" message was shown first but it's
  References: header contains only a fake Message-Id: (which is
  strange: why does notmuch show this resolved-mail in this
  thread?).  


- one thread where two children of one message were shown with
  correct (meaning: same) indentation but the older one before
  (over) the newer one.


I'll ask if I may disclose this threads if this it would be
helpful for diagnosis.


Thanks for your attention and work regarding my problem report
with notmuch.

Ciao; Gregor

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

Re: v1: fix for threading of replies to ghost messages

Gregor Zattler <[hidden email]> writes:

>
> I applied the patches and looked with notmuch-emacs for threads
> which before would probably show wrong ordering of messages.  The
> half a dozen threads were all shown in the correct order with
> respect to parents and children, but I found from those not
> disclosable RT ticket mails (I mentioned in above referenced
> thread):

Can you also try the series at

    id:[hidden email]?

I'm not sure if it's related, but it does fix replies for a different
bug tracker. You will have to reindex the messages in question
(e.g. using notmuch reindex).
_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
Gregor Zattler Gregor Zattler
Reply | Threaded
Open this post in threaded view
|

Re: v1: fix for threading of replies to ghost messages

Hi David,
* David Bremner <[hidden email]> [2018-07-23; 16:41]:

> Gregor Zattler <[hidden email]> writes:
>> I applied the patches and looked with notmuch-emacs for threads
>> which before would probably show wrong ordering of messages.  The
>> half a dozen threads were all shown in the correct order with
>> respect to parents and children, but I found from those not
>> disclosable RT ticket mails (I mentioned in above referenced
>> thread):
>
> Can you also try the series at
>
>     id:[hidden email]?
>
> I'm not sure if it's related, but it does fix replies for a different
> bug tracker. You will have to reindex the messages in question
> (e.g. using notmuch reindex).

I applied this patch set on top of the other one, copied
the relevant threads to a test-maildir, changed path in
.notmuch-config, thus building a new index: Same order of
messages in notmuch-emacs-show as with the first patch set only.

I asked for permission to disclose two small threads, have to
wait for answer, will report back


Thanks again; Gregor
--
 -... --- .-. . -.. ..--.. ...-.-

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch