thread grouping, more or less

nntp
magical 2022-08-10 03:24:07 +00:00
parent 2718e223c6
commit d2d599318f
1 changed files with 96 additions and 35 deletions

View File

@ -7,9 +7,12 @@ import nntplib
import time
import json
import ssl
import re
import os
__all__ = ('BBJNews','URLError')
class BBJNews(object):
# this module isnt exactly complete. The below description claims
# `all of its endpoints are mapped to native methods` though this
@ -506,35 +509,8 @@ class BBJNews(object):
# :bytes - the number of bytes in the article
# :lines - the number of lines in the body (deprecated)
if False:
# build up a map of message references
# we use a disjoint-set data structure
# to find the root of each message
threadmap = {}
rank = {}
for num, ov in overviews:
msgid = nntplib.decode_header(ov['message-id'])
# RFC5536 suggests that whitespace should not occur inside
# a message id, which (if true) makes it pretty easy to split
# the list of message ids in the references header
refs = nntplib.decode_header(ov['references']).split()
for r in refs:
threadmap[msgid] = r
rank[msgid] = 1
# TODO
else:
# make every message its own thread, for prototyping purposes
#t = {
# 'title': str,
# 'reply_count': int, # does this include the OP?
# 'pinned': bool,
# 'thread_id': uuid
# 'author': user_uuid,
# 'created': time,
# 'last_mod': time,
# 'last_author': user_uuid,
#}
threads = _overview_to_threads(overviews)
# see also: https://www.jwz.org/doc/threading.html
threads = _overviews_to_threads_fancy(overviews)
# make usermap
usermap = {}
@ -545,12 +521,13 @@ class BBJNews(object):
addr = _parse_single_address(userid)
usermap[userid] = {
'user_id': userid,
'user_name': addr.name,
'user_name': addr.name or addr.user,
'address': addr.address,
'color': colorhash(userid),
'is_admin': False, # TODO: LIST MODERATORS?
}
threads.sort(key=lambda x: x['last_mod'], reverse=True)
return threads, usermap
@ -565,7 +542,8 @@ class BBJNews(object):
print(usermap[author_id]["user_name"])
print(message["body"])
"""
return {}, {}
m = self.fake_message('oops...')
return {"title":"", "messages":[m], "author":m['author']}, {m['author']: self.user}
response = self("thread_load",
format=format, thread_id=thread_id, op_only=op_only)
@ -615,14 +593,14 @@ class BBJNews(object):
}
# unused
def format_message(self, body, format="sequential"):
"""
Send `body` to the server to be formatted according to `format`,
defaulting to the sequential parser. Returns the body object.
"""
response = self("format_message", body=body, format=format)
return response["data"]
return [[(None, body)]]
#response = self("format_message", body=body, format=format)
#return response["data"]
# unsupported
def message_delete(self, thread_id, post_id):
@ -747,8 +725,78 @@ class BBJNews(object):
"messages": response["data"]["messages"]
}
def _overviews_to_threads_fancy(overviews):
# build up a map of message references
# we use a disjoint-set data structure
# to find the root of each message
threadmap = {}
def find(id):
parent = threadmap.setdefault(id, id)
if parent == id:
return id
root = find(parent)
if root != parent:
threadmap[id] = root
return root
messages = {}
for num, ov in overviews:
try:
msgid = nntplib.decode_header(ov['message-id']).strip()
refs = _parse_message_ids(nntplib.decode_header(ov['references']))
except ValueError:
continue
messages[msgid] = (num, msgid, ov)
for r in refs:
threadmap[find(msgid)] = find(r)
thread_messages = {}
for id in messages:
root = find(id)
l = thread_messages.setdefault(root, [])
l.append(messages[id])
threads = []
for id, messages in thread_messages.items():
messages.sort(key=lambda x: x[0])
first = messages[0][2]
last = messages[-1][2]
try:
d = nntplib.decode_header(first['date'])
d = email.utils.mktime_tz(email.utils.parsedate_tz(d))
d2 = nntplib.decode_header(last['date'])
d2 = email.utils.mktime_tz(email.utils.parsedate_tz(d2))
t = {
'pinned': False,
'title': nntplib.decode_header(first['subject']),
'reply_count': len(messages),
'thread_id': nntplib.decode_header(first['message-id']),
'author': nntplib.decode_header(first['from']),
'created': d,
'last_author': nntplib.decode_header(last['from']),
'last_mod': d2,
}
except (ValueError, KeyError, IndexError):
continue
else:
threads.append(t)
return threads
def _overview_to_threads(overviews):
# make every message its own thread, for prototyping purposes
#t = {
# 'title': str,
# 'reply_count': int, # does this include the OP?
# 'pinned': bool,
# 'thread_id': uuid
# 'author': user_uuid,
# 'created': time,
# 'last_mod': time,
# 'last_author': user_uuid,
#}
threads = []
for num, ov in overviews:
try:
@ -782,7 +830,20 @@ def _test_overview_to_threads():
print(t)
Address = namedtuple('Address', 'name, address')
_atext = r"[a-zA-Z0-9!#$%&'\*\+\-/=?^_`{|}~]" # RFC 5322 §3.2.3
_dotatext = r"%s+(?:\.%s+)*" % (_atext, _atext)
_mdtext = r"\[[!-=\?-Z^-~]\]"
_msg_id_re = re.compile(r'<%s@(?:%s|%s)>' % (_dotatext, _dotatext, _mdtext)) # RFC 5536 §3.1.3
def _parse_message_ids(s):
"""parses a list of message ids separated by junk"""
return _msg_id_re.findall(s)
class Address(namedtuple('Address', 'name, address')):
@property
def user(self):
user, _, _ = self.address.partition("@")
return user
def _parse_single_address(value):
# the email.headerregistry api is truly bizarre