thread grouping, more or less

2022-08-10 03:24:07 +00:00 · 2022-08-10 03:24:07 +00:00 · d2d599318f
commit d2d599318f
parent 2718e223c6
1 changed files with 96 additions and 35 deletions
--- a/clients/nntp_client.py
+++ b/clients/nntp_client.py
@ -7,9 +7,12 @@ import nntplib
 import time
 import json
 import ssl
+import re

 import os

+__all__ = ('BBJNews','URLError')
+
 class BBJNews(object):
    # this module isnt exactly complete. The below description claims
    # `all of its endpoints are mapped to native methods` though this
@ -506,35 +509,8 @@ class BBJNews(object):
        #       :bytes - the number of bytes in the article
        #       :lines - the number of lines in the body (deprecated)

-        if False:
-            # build up a map of message references
-            # we use a disjoint-set data structure
-            # to find the root of each message
-            threadmap = {}
-            rank = {}
-            for num, ov in overviews:
-                msgid = nntplib.decode_header(ov['message-id'])
-                # RFC5536 suggests that whitespace should not occur inside
-                # a message id, which (if true) makes it pretty easy to split
-                # the list of message ids in the references header
-                refs = nntplib.decode_header(ov['references']).split()
-                for r in refs:
-                    threadmap[msgid] = r
-                    rank[msgid] = 1
-                    # TODO
-        else:
-            # make every message its own thread, for prototyping purposes
-            #t = {
-            #    'title': str,
-            #    'reply_count': int, # does this include the OP?
-            #    'pinned': bool,
-            #    'thread_id': uuid
-            #    'author': user_uuid,
-            #    'created': time,
-            #    'last_mod': time,
-            #    'last_author': user_uuid,
-            #}
-            threads = _overview_to_threads(overviews)
+        # see also: https://www.jwz.org/doc/threading.html
+        threads = _overviews_to_threads_fancy(overviews)

        # make usermap
        usermap = {}
@ -545,12 +521,13 @@ class BBJNews(object):
            addr = _parse_single_address(userid)
            usermap[userid] = {
                'user_id': userid,
-                'user_name': addr.name,
+                'user_name': addr.name or addr.user,
                'address': addr.address,
                'color': colorhash(userid),
                'is_admin': False, # TODO: LIST MODERATORS?
            }

+        threads.sort(key=lambda x: x['last_mod'], reverse=True)
        return threads, usermap


@ -565,7 +542,8 @@ class BBJNews(object):
              print(usermap[author_id]["user_name"])
              print(message["body"])
        """
-        return {}, {}
+        m = self.fake_message('oops...')
+        return {"title":"", "messages":[m], "author":m['author']}, {m['author']: self.user}

        response = self("thread_load",
            format=format, thread_id=thread_id, op_only=op_only)
@ -615,14 +593,14 @@ class BBJNews(object):
        }


-    # unused
    def format_message(self, body, format="sequential"):
        """
        Send `body` to the server to be formatted according to `format`,
        defaulting to the sequential parser. Returns the body object.
        """
-        response = self("format_message", body=body, format=format)
-        return response["data"]
+        return [[(None, body)]]
+        #response = self("format_message", body=body, format=format)
+        #return response["data"]

    # unsupported
    def message_delete(self, thread_id, post_id):
@ -747,8 +725,78 @@ class BBJNews(object):
            "messages": response["data"]["messages"]
        }

+def _overviews_to_threads_fancy(overviews):
+    # build up a map of message references
+    # we use a disjoint-set data structure
+    # to find the root of each message
+    threadmap = {}
+    def find(id):
+        parent = threadmap.setdefault(id, id)
+        if parent == id:
+            return id
+        root = find(parent)
+        if root != parent:
+            threadmap[id] = root
+        return root
+
+    messages = {}
+    for num, ov in overviews:
+        try:
+            msgid = nntplib.decode_header(ov['message-id']).strip()
+            refs = _parse_message_ids(nntplib.decode_header(ov['references']))
+        except ValueError:
+            continue
+
+        messages[msgid] = (num, msgid, ov)
+        for r in refs:
+            threadmap[find(msgid)] = find(r)
+
+    thread_messages = {}
+    for id in messages:
+        root = find(id)
+        l = thread_messages.setdefault(root, [])
+        l.append(messages[id])
+
+    threads = []
+    for id, messages in thread_messages.items():
+        messages.sort(key=lambda x: x[0])
+        first = messages[0][2]
+        last = messages[-1][2]
+        try:
+            d = nntplib.decode_header(first['date'])
+            d = email.utils.mktime_tz(email.utils.parsedate_tz(d))
+            d2 = nntplib.decode_header(last['date'])
+            d2 = email.utils.mktime_tz(email.utils.parsedate_tz(d2))
+            t = {
+                'pinned': False,
+                'title': nntplib.decode_header(first['subject']),
+                'reply_count': len(messages),
+                'thread_id': nntplib.decode_header(first['message-id']),
+                'author': nntplib.decode_header(first['from']),
+                'created': d,
+                'last_author': nntplib.decode_header(last['from']),
+                'last_mod': d2,
+            }
+        except (ValueError, KeyError, IndexError):
+            continue
+        else:
+            threads.append(t)
+
+    return threads
+

 def _overview_to_threads(overviews):
+    # make every message its own thread, for prototyping purposes
+    #t = {
+    #    'title': str,
+    #    'reply_count': int, # does this include the OP?
+    #    'pinned': bool,
+    #    'thread_id': uuid
+    #    'author': user_uuid,
+    #    'created': time,
+    #    'last_mod': time,
+    #    'last_author': user_uuid,
+    #}
    threads = []
    for num, ov in overviews:
        try:
@ -782,7 +830,20 @@ def _test_overview_to_threads():
        print(t)


-Address = namedtuple('Address', 'name, address')
+_atext = r"[a-zA-Z0-9!#$%&'\*\+\-/=?^_`{|}~]" # RFC 5322 §3.2.3
+_dotatext = r"%s+(?:\.%s+)*" % (_atext, _atext)
+_mdtext = r"\[[!-=\?-Z^-~]\]"
+_msg_id_re = re.compile(r'<%s@(?:%s|%s)>' % (_dotatext, _dotatext, _mdtext)) # RFC 5536 §3.1.3
+
+def _parse_message_ids(s):
+    """parses a list of message ids separated by junk"""
+    return _msg_id_re.findall(s)
+
+class Address(namedtuple('Address', 'name, address')):
+    @property
+    def user(self):
+        user, _, _ = self.address.partition("@")
+        return user

 def _parse_single_address(value):
    # the email.headerregistry api is truly bizarre