From d2d599318fcdec2ac36d57b07c831d7d0d6790df Mon Sep 17 00:00:00 2001 From: magical Date: Wed, 10 Aug 2022 03:24:07 +0000 Subject: [PATCH] thread grouping, more or less --- clients/nntp_client.py | 131 ++++++++++++++++++++++++++++++----------- 1 file changed, 96 insertions(+), 35 deletions(-) diff --git a/clients/nntp_client.py b/clients/nntp_client.py index aeb6845..7e8e54c 100644 --- a/clients/nntp_client.py +++ b/clients/nntp_client.py @@ -7,9 +7,12 @@ import nntplib import time import json import ssl +import re import os +__all__ = ('BBJNews','URLError') + class BBJNews(object): # this module isnt exactly complete. The below description claims # `all of its endpoints are mapped to native methods` though this @@ -506,35 +509,8 @@ class BBJNews(object): # :bytes - the number of bytes in the article # :lines - the number of lines in the body (deprecated) - if False: - # build up a map of message references - # we use a disjoint-set data structure - # to find the root of each message - threadmap = {} - rank = {} - for num, ov in overviews: - msgid = nntplib.decode_header(ov['message-id']) - # RFC5536 suggests that whitespace should not occur inside - # a message id, which (if true) makes it pretty easy to split - # the list of message ids in the references header - refs = nntplib.decode_header(ov['references']).split() - for r in refs: - threadmap[msgid] = r - rank[msgid] = 1 - # TODO - else: - # make every message its own thread, for prototyping purposes - #t = { - # 'title': str, - # 'reply_count': int, # does this include the OP? - # 'pinned': bool, - # 'thread_id': uuid - # 'author': user_uuid, - # 'created': time, - # 'last_mod': time, - # 'last_author': user_uuid, - #} - threads = _overview_to_threads(overviews) + # see also: https://www.jwz.org/doc/threading.html + threads = _overviews_to_threads_fancy(overviews) # make usermap usermap = {} @@ -545,12 +521,13 @@ class BBJNews(object): addr = _parse_single_address(userid) usermap[userid] = { 'user_id': userid, - 'user_name': addr.name, + 'user_name': addr.name or addr.user, 'address': addr.address, 'color': colorhash(userid), 'is_admin': False, # TODO: LIST MODERATORS? } + threads.sort(key=lambda x: x['last_mod'], reverse=True) return threads, usermap @@ -565,7 +542,8 @@ class BBJNews(object): print(usermap[author_id]["user_name"]) print(message["body"]) """ - return {}, {} + m = self.fake_message('oops...') + return {"title":"", "messages":[m], "author":m['author']}, {m['author']: self.user} response = self("thread_load", format=format, thread_id=thread_id, op_only=op_only) @@ -615,14 +593,14 @@ class BBJNews(object): } - # unused def format_message(self, body, format="sequential"): """ Send `body` to the server to be formatted according to `format`, defaulting to the sequential parser. Returns the body object. """ - response = self("format_message", body=body, format=format) - return response["data"] + return [[(None, body)]] + #response = self("format_message", body=body, format=format) + #return response["data"] # unsupported def message_delete(self, thread_id, post_id): @@ -747,8 +725,78 @@ class BBJNews(object): "messages": response["data"]["messages"] } +def _overviews_to_threads_fancy(overviews): + # build up a map of message references + # we use a disjoint-set data structure + # to find the root of each message + threadmap = {} + def find(id): + parent = threadmap.setdefault(id, id) + if parent == id: + return id + root = find(parent) + if root != parent: + threadmap[id] = root + return root + + messages = {} + for num, ov in overviews: + try: + msgid = nntplib.decode_header(ov['message-id']).strip() + refs = _parse_message_ids(nntplib.decode_header(ov['references'])) + except ValueError: + continue + + messages[msgid] = (num, msgid, ov) + for r in refs: + threadmap[find(msgid)] = find(r) + + thread_messages = {} + for id in messages: + root = find(id) + l = thread_messages.setdefault(root, []) + l.append(messages[id]) + + threads = [] + for id, messages in thread_messages.items(): + messages.sort(key=lambda x: x[0]) + first = messages[0][2] + last = messages[-1][2] + try: + d = nntplib.decode_header(first['date']) + d = email.utils.mktime_tz(email.utils.parsedate_tz(d)) + d2 = nntplib.decode_header(last['date']) + d2 = email.utils.mktime_tz(email.utils.parsedate_tz(d2)) + t = { + 'pinned': False, + 'title': nntplib.decode_header(first['subject']), + 'reply_count': len(messages), + 'thread_id': nntplib.decode_header(first['message-id']), + 'author': nntplib.decode_header(first['from']), + 'created': d, + 'last_author': nntplib.decode_header(last['from']), + 'last_mod': d2, + } + except (ValueError, KeyError, IndexError): + continue + else: + threads.append(t) + + return threads + def _overview_to_threads(overviews): + # make every message its own thread, for prototyping purposes + #t = { + # 'title': str, + # 'reply_count': int, # does this include the OP? + # 'pinned': bool, + # 'thread_id': uuid + # 'author': user_uuid, + # 'created': time, + # 'last_mod': time, + # 'last_author': user_uuid, + #} threads = [] for num, ov in overviews: try: @@ -782,7 +830,20 @@ def _test_overview_to_threads(): print(t) -Address = namedtuple('Address', 'name, address') +_atext = r"[a-zA-Z0-9!#$%&'\*\+\-/=?^_`{|}~]" # RFC 5322 §3.2.3 +_dotatext = r"%s+(?:\.%s+)*" % (_atext, _atext) +_mdtext = r"\[[!-=\?-Z^-~]\]" +_msg_id_re = re.compile(r'<%s@(?:%s|%s)>' % (_dotatext, _dotatext, _mdtext)) # RFC 5536 §3.1.3 + +def _parse_message_ids(s): + """parses a list of message ids separated by junk""" + return _msg_id_re.findall(s) + +class Address(namedtuple('Address', 'name, address')): + @property + def user(self): + user, _, _ = self.address.partition("@") + return user def _parse_single_address(value): # the email.headerregistry api is truly bizarre