Proposed New sort API

classic Classic list List threaded Threaded
10 messages Options
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

Proposed New sort API

I started looking at William's sorting patches [1], but the
proliferation of sorting options bugged me a bit. I decided to sketch
out a new more flexible API.

In the new API, there is is a sort "key", currently mapped one-to-one
to value slots, but potentially could do more sophisticated things
like making keys up dynamically (e.g. preprocessing subjects).

The second parameter is a sort "type". Currently this is just
ascending or descending, but other potential options include
sort_by_relevance_then_value

[1]: id:[hidden email]
_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/2] WIP: groundwork for new sorting API

---
 lib/notmuch.h | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/query.cc  | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 124 insertions(+)

diff --git a/lib/notmuch.h b/lib/notmuch.h
index 39759b7a..ae592e93 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -773,6 +773,10 @@ notmuch_query_create (notmuch_database_t *database,
  * Sort values for notmuch_query_set_sort.
  */
 typedef enum {
+    /**
+     * Value was not set
+     */
+    NOTMUCH_SORT_UNSET = -1,
     /**
      * Oldest first.
      */
@@ -791,6 +795,42 @@ typedef enum {
     NOTMUCH_SORT_UNSORTED
 } notmuch_sort_t;
 
+/**
+ * Sort key values for notmuch_query_set_sort_key
+ */
+typedef enum {
+    /**
+     * Do not sort.
+     */
+    NOTMUCH_SORT_KEY_NONE=0,
+    /**
+     * Sort by timestamp (from Date: header)
+     */
+    NOTMUCH_SORT_KEY_TIMESTAMP,
+    /**
+     * Sort by message-id.
+     */
+    NOTMUCH_SORT_KEY_MESSAGE_ID,
+} notmuch_sort_key_t;
+
+/**
+ * Sort type values for notmuch_query_set_sort_type
+ */
+typedef enum {
+    /**
+     * Do not sort.
+     */
+    NOTMUCH_SORT_TYPE_NONE=0,
+    /**
+     * Ascending order
+     */
+    NOTMUCH_SORT_TYPE_ASCENDING,
+    /**
+     * Descending order
+     */
+    NOTMUCH_SORT_TYPE_DESCENDING,
+} notmuch_sort_type_t;
+
 /**
  * Return the query_string of this query. See notmuch_query_create.
  */
@@ -860,6 +900,32 @@ notmuch_query_set_sort (notmuch_query_t *query, notmuch_sort_t sort);
 notmuch_sort_t
 notmuch_query_get_sort (const notmuch_query_t *query);
 
+/**
+ * Specify the sort key for this query.
+ */
+void
+notmuch_query_set_sort_key (notmuch_query_t *query, notmuch_sort_key_t key);
+
+/**
+ * Return the sort_key specified for this query. See
+ * notmuch_query_set_sort_key.
+ */
+notmuch_sort_key_t
+notmuch_query_get_sort_key (const notmuch_query_t *query);
+
+/**
+ * Specify the sort type for this query.
+ */
+void
+notmuch_query_set_sort_type (notmuch_query_t *query, notmuch_sort_type_t type);
+
+/**
+ * Return the sort_type specified for this query. See
+ * notmuch_query_set_sort_type.
+ */
+notmuch_sort_type_t
+notmuch_query_get_sort_type (const notmuch_query_t *query);
+
 /**
  * Add a tag that will be excluded from the query results by default.
  * This exclusion will be ignored if this tag appears explicitly in
diff --git a/lib/query.cc b/lib/query.cc
index d633fa3d..c3b228fb 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -27,6 +27,8 @@ struct _notmuch_query {
     notmuch_database_t *notmuch;
     const char *query_string;
     notmuch_sort_t sort;
+    notmuch_sort_key_t sort_key;
+    notmuch_sort_type_t sort_type;
     notmuch_string_list_t *exclude_terms;
     notmuch_exclude_t omit_excluded;
     bool parsed;
@@ -107,6 +109,9 @@ notmuch_query_create (notmuch_database_t *notmuch,
 
     query->sort = NOTMUCH_SORT_NEWEST_FIRST;
 
+    query->sort_key = NOTMUCH_SORT_KEY_TIMESTAMP;
+    query->sort_type = NOTMUCH_SORT_TYPE_DESCENDING;
+
     query->exclude_terms = _notmuch_string_list_create (query);
 
     query->omit_excluded = NOTMUCH_EXCLUDE_TRUE;
@@ -168,7 +173,33 @@ notmuch_query_set_omit_excluded (notmuch_query_t *query,
 void
 notmuch_query_set_sort (notmuch_query_t *query, notmuch_sort_t sort)
 {
+    /* only for use by _get_sort */
     query->sort = sort;
+
+    /* translate to new API */
+    switch (sort) {
+    case NOTMUCH_SORT_UNSET:
+ /* this probably indicates an error, but it seems relatively
+ * harmless, and this code path is deprecated */
+ break;
+    case NOTMUCH_SORT_OLDEST_FIRST:
+ query->sort_key = NOTMUCH_SORT_KEY_TIMESTAMP;
+ query->sort_type = NOTMUCH_SORT_TYPE_ASCENDING;
+ break;
+    case NOTMUCH_SORT_NEWEST_FIRST:
+ query->sort_key = NOTMUCH_SORT_KEY_TIMESTAMP;
+ query->sort_type = NOTMUCH_SORT_TYPE_DESCENDING;
+ break;
+    case NOTMUCH_SORT_MESSAGE_ID:
+ query->sort_key = NOTMUCH_SORT_KEY_MESSAGE_ID;
+ query->sort_type = NOTMUCH_SORT_TYPE_ASCENDING;
+ break;
+    case NOTMUCH_SORT_UNSORTED:
+ query->sort_key = NOTMUCH_SORT_KEY_NONE;
+ query->sort_type = NOTMUCH_SORT_TYPE_NONE;
+ break;
+    }
+
 }
 
 notmuch_sort_t
@@ -177,6 +208,32 @@ notmuch_query_get_sort (const notmuch_query_t *query)
     return query->sort;
 }
 
+void
+notmuch_query_set_sort_key (notmuch_query_t *query, notmuch_sort_key_t sort_key)
+{
+    query->sort = NOTMUCH_SORT_UNSET;
+    query->sort_key = sort_key;
+}
+
+notmuch_sort_key_t
+notmuch_query_get_sort_key (const notmuch_query_t *query)
+{
+    return query->sort_key;
+}
+
+void
+notmuch_query_set_sort_type (notmuch_query_t *query, notmuch_sort_type_t sort_type)
+{
+    query->sort = NOTMUCH_SORT_UNSET;
+    query->sort_type = sort_type;
+}
+
+notmuch_sort_type_t
+notmuch_query_get_sort_type (const notmuch_query_t *query)
+{
+    return query->sort_type;
+}
+
 notmuch_status_t
 notmuch_query_add_tag_exclude (notmuch_query_t *query, const char *tag)
 {
@@ -331,6 +388,7 @@ _notmuch_query_search_documents (notmuch_query_t *query,
     enquire.set_sort_by_value (NOTMUCH_VALUE_MESSAGE_ID, false);
     break;
  case NOTMUCH_SORT_UNSORTED:
+ case NOTMUCH_SORT_UNSET:
     break;
  }
 
--
2.15.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

[PATCH 2/2] WIP: use new sort key/type

In reply to this post by David Bremner-2
---
 lib/query.cc | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/lib/query.cc b/lib/query.cc
index c3b228fb..0e529ca8 100644
--- a/lib/query.cc
+++ b/lib/query.cc
@@ -377,18 +377,18 @@ _notmuch_query_search_documents (notmuch_query_t *query,
 
  enquire.set_weighting_scheme (Xapian::BoolWeight());
 
- switch (query->sort) {
- case NOTMUCH_SORT_OLDEST_FIRST:
-    enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, false);
-    break;
- case NOTMUCH_SORT_NEWEST_FIRST:
-    enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, true);
+ bool sort_reverse = false;
+ if (query->sort_type == NOTMUCH_SORT_TYPE_DESCENDING)
+    sort_reverse = true;
+
+ switch (query->sort_key) {
+ case NOTMUCH_SORT_KEY_TIMESTAMP:
+    enquire.set_sort_by_value (NOTMUCH_VALUE_TIMESTAMP, sort_reverse);
     break;
- case NOTMUCH_SORT_MESSAGE_ID:
-    enquire.set_sort_by_value (NOTMUCH_VALUE_MESSAGE_ID, false);
+ case NOTMUCH_SORT_KEY_MESSAGE_ID:
+    enquire.set_sort_by_value (NOTMUCH_VALUE_MESSAGE_ID, sort_reverse);
     break;
- case NOTMUCH_SORT_UNSORTED:
- case NOTMUCH_SORT_UNSET:
+ case NOTMUCH_SORT_KEY_NONE:
     break;
  }
 
--
2.15.0

_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
William Casarin William Casarin
Reply | Threaded
Open this post in threaded view
|

Re: Proposed New sort API

In reply to this post by David Bremner-2
David Bremner <[hidden email]> writes:

> I started looking at William's sorting patches [1], but the
> proliferation of sorting options bugged me a bit. I decided to sketch
> out a new more flexible API.
>
> In the new API, there is is a sort "key", currently mapped one-to-one
> to value slots, but potentially could do more sophisticated things
> like making keys up dynamically (e.g. preprocessing subjects).
>
> The second parameter is a sort "type". Currently this is just
> ascending or descending, but other potential options include
> sort_by_relevance_then_value

This is great, this is what I was thinking about as well. I'll try
refactoring my sorting branch on top of these changes.

--
https://jb55.com
_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
William Casarin William Casarin
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/2] WIP: groundwork for new sorting API

In reply to this post by David Bremner-2
David Bremner <[hidden email]> writes:

> --- a/lib/notmuch.h
> +++ b/lib/notmuch.h
> @@ -773,6 +773,10 @@ notmuch_query_create (notmuch_database_t *database,
>   * Sort values for notmuch_query_set_sort.
>   */
>  typedef enum {
> +    /**
> +     * Value was not set
> +     */
> +    NOTMUCH_SORT_UNSET = -1,
>      /**
>       * Oldest first.
>       */
> @@ -791,6 +795,42 @@ typedef enum {
>      NOTMUCH_SORT_UNSORTED
>  } notmuch_sort_t;


I'm adding a few new keys (from, subject, etc). I don't want to
duplicate them in the old notmuch_sort_t enum, I assume to move to the
new API there should be something like this as well?


diff --git a/lib/notmuch.h b/lib/notmuch.h
index d26cc09b..81a9d72f 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -787,10 +787,14 @@ typedef enum {
     NOTMUCH_SORT_NEWEST_FIRST,
     /**
      * Sort by message-id.
      */
     NOTMUCH_SORT_MESSAGE_ID,
+    /**
+     * Sort by keys.
+     */
+    NOTMUCH_SORT_KEYS,
     /**
      * Do not sort.
      */
     NOTMUCH_SORT_UNSORTED
 } notmuch_sort_t;


I've been wanting to sort by multiple keys recently so I figured
generalizing this to multiple (key, order) pairs might be a good idea
now that we're rethinking the sort api.

so then if I wanted to add my current changes I could just add a few
more keys like so:


diff --git a/lib/notmuch.h b/lib/notmuch.h
index a035bdb2..d26cc09b 100644
--- a/lib/notmuch.h
+++ b/lib/notmuch.h
@@ -809,10 +809,18 @@ typedef enum {
     NOTMUCH_SORT_KEY_TIMESTAMP,
     /**
      * Sort by message-id.
      */
     NOTMUCH_SORT_KEY_MESSAGE_ID,
+    /**
+     * Sort by subject.
+     */
+    NOTMUCH_SORT_KEY_SUBJECT,
+    /**
+     * Sort by from.
+     */
+    NOTMUCH_SORT_KEY_FROM,
 } notmuch_sort_key_t;
 

Then we could eventually do something like:

  --sort date:desc,from:asc

Which would mean: sort by date, and then by from.

Perhaps it could do something sensible by default without order
qualifiers as well:

  --sort date,from

LMK what you think


Will
_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
William Casarin William Casarin
Reply | Threaded
Open this post in threaded view
|

Re: Proposed New sort API

In reply to this post by David Bremner-2
David Bremner <[hidden email]> writes:

> I started looking at William's sorting patches [1], but the
> proliferation of sorting options bugged me a bit. I decided to sketch
> out a new more flexible API.
>
> In the new API, there is is a sort "key", currently mapped one-to-one
> to value slots, but potentially could do more sophisticated things
> like making keys up dynamically (e.g. preprocessing subjects).
>
> The second parameter is a sort "type". Currently this is just
> ascending or descending, but other potential options include
> sort_by_relevance_then_value


Another thought I had that I wanted to throw out there for
consideration. It would be nice to be able to sort by "popular" threads,
aka sort by the number of messages in each thread. Not sure if this is
an easy thing to do at the query level?


Cheers,
Will
_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

Re: Proposed New sort API

William Casarin <[hidden email]> writes:

> David Bremner <[hidden email]> writes:
>
>> I started looking at William's sorting patches [1], but the
>> proliferation of sorting options bugged me a bit. I decided to sketch
>> out a new more flexible API.
>>
>> In the new API, there is is a sort "key", currently mapped one-to-one
>> to value slots, but potentially could do more sophisticated things
>> like making keys up dynamically (e.g. preprocessing subjects).
>>
>> The second parameter is a sort "type". Currently this is just
>> ascending or descending, but other potential options include
>> sort_by_relevance_then_value
>
>
> Another thought I had that I wanted to throw out there for
> consideration. It would be nice to be able to sort by "popular" threads,
> aka sort by the number of messages in each thread. Not sure if this is
> an easy thing to do at the query level?

I don't see how to manage it as a xapian query. In the current database
schema xapian doesn't really know about threads, except that messages
know what thread they belong to.
_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/2] WIP: groundwork for new sorting API

In reply to this post by William Casarin
William Casarin <[hidden email]> writes:

>
>
> I'm adding a few new keys (from, subject, etc). I don't want to
> duplicate them in the old notmuch_sort_t enum, I assume to move to the
> new API there should be something like this as well?

It's been a while since I was thinking about this, but I suspect my
intent was that new keys could be supported (only) by the new API.
We basically want to deprecate the old API as soon as the new one exists
to transition to.
>  
>
> Then we could eventually do something like:
>
>   --sort date:desc,from:asc
>
> Which would mean: sort by date, and then by from.

I think the "notmuch way" would be to spell out descending and
ascending, and rely on completion to make that less painful to
type. I only mention it because I was confused by what "desc" and "asc"
stood for. For some reason descriptor and ascii jumped to my
pre-caffeinated mind.

> Perhaps it could do something sensible by default without order
> qualifiers as well:
>
>   --sort date,from

I would hope so, yes.
_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
William Casarin William Casarin
Reply | Threaded
Open this post in threaded view
|

Re: Proposed New sort API

In reply to this post by David Bremner-2
David Bremner <[hidden email]> writes:

> William Casarin <[hidden email]> writes:
>
>> Another thought I had that I wanted to throw out there for
>> consideration. It would be nice to be able to sort by "popular" threads,
>> aka sort by the number of messages in each thread. Not sure if this is
>> an easy thing to do at the query level?
>
> I don't see how to manage it as a xapian query. In the current database
> schema xapian doesn't really know about threads, except that messages
> know what thread they belong to.

I noticed notmuch-search shows the number of messages in the thread,
perhaps you could just sort on that value when returning the results?
_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch
David Bremner-2 David Bremner-2
Reply | Threaded
Open this post in threaded view
|

Re: Proposed New sort API

William Casarin <[hidden email]> writes:

> David Bremner <[hidden email]> writes:
>
>> William Casarin <[hidden email]> writes:
>>
>>> Another thought I had that I wanted to throw out there for
>>> consideration. It would be nice to be able to sort by "popular" threads,
>>> aka sort by the number of messages in each thread. Not sure if this is
>>> an easy thing to do at the query level?
>>
>> I don't see how to manage it as a xapian query. In the current database
>> schema xapian doesn't really know about threads, except that messages
>> know what thread they belong to.
>
> I noticed notmuch-search shows the number of messages in the thread,
> perhaps you could just sort on that value when returning the results?

AFAIU, currently there is no explicit sorting of threads returned, any
sorting is a side effect of the sorting of the underlying
messages. Since the threads are generated lazily, for queries generating
a large number of threads such a post thread generation sorting would
generate a noticeable delay. On the other hand, if it was optional, I
guess that would be OK. It would introduce some complications and new
code paths orthogonal to the other stuff we've been talking about.

d
_______________________________________________
notmuch mailing list
[hidden email]
https://notmuchmail.org/mailman/listinfo/notmuch