thread grouping, more or less
parent
2718e223c6
commit
d2d599318f
|
@ -7,9 +7,12 @@ import nntplib
|
|||
import time
|
||||
import json
|
||||
import ssl
|
||||
import re
|
||||
|
||||
import os
|
||||
|
||||
__all__ = ('BBJNews','URLError')
|
||||
|
||||
class BBJNews(object):
|
||||
# this module isnt exactly complete. The below description claims
|
||||
# `all of its endpoints are mapped to native methods` though this
|
||||
|
@ -506,35 +509,8 @@ class BBJNews(object):
|
|||
# :bytes - the number of bytes in the article
|
||||
# :lines - the number of lines in the body (deprecated)
|
||||
|
||||
if False:
|
||||
# build up a map of message references
|
||||
# we use a disjoint-set data structure
|
||||
# to find the root of each message
|
||||
threadmap = {}
|
||||
rank = {}
|
||||
for num, ov in overviews:
|
||||
msgid = nntplib.decode_header(ov['message-id'])
|
||||
# RFC5536 suggests that whitespace should not occur inside
|
||||
# a message id, which (if true) makes it pretty easy to split
|
||||
# the list of message ids in the references header
|
||||
refs = nntplib.decode_header(ov['references']).split()
|
||||
for r in refs:
|
||||
threadmap[msgid] = r
|
||||
rank[msgid] = 1
|
||||
# TODO
|
||||
else:
|
||||
# make every message its own thread, for prototyping purposes
|
||||
#t = {
|
||||
# 'title': str,
|
||||
# 'reply_count': int, # does this include the OP?
|
||||
# 'pinned': bool,
|
||||
# 'thread_id': uuid
|
||||
# 'author': user_uuid,
|
||||
# 'created': time,
|
||||
# 'last_mod': time,
|
||||
# 'last_author': user_uuid,
|
||||
#}
|
||||
threads = _overview_to_threads(overviews)
|
||||
# see also: https://www.jwz.org/doc/threading.html
|
||||
threads = _overviews_to_threads_fancy(overviews)
|
||||
|
||||
# make usermap
|
||||
usermap = {}
|
||||
|
@ -545,12 +521,13 @@ class BBJNews(object):
|
|||
addr = _parse_single_address(userid)
|
||||
usermap[userid] = {
|
||||
'user_id': userid,
|
||||
'user_name': addr.name,
|
||||
'user_name': addr.name or addr.user,
|
||||
'address': addr.address,
|
||||
'color': colorhash(userid),
|
||||
'is_admin': False, # TODO: LIST MODERATORS?
|
||||
}
|
||||
|
||||
threads.sort(key=lambda x: x['last_mod'], reverse=True)
|
||||
return threads, usermap
|
||||
|
||||
|
||||
|
@ -565,7 +542,8 @@ class BBJNews(object):
|
|||
print(usermap[author_id]["user_name"])
|
||||
print(message["body"])
|
||||
"""
|
||||
return {}, {}
|
||||
m = self.fake_message('oops...')
|
||||
return {"title":"", "messages":[m], "author":m['author']}, {m['author']: self.user}
|
||||
|
||||
response = self("thread_load",
|
||||
format=format, thread_id=thread_id, op_only=op_only)
|
||||
|
@ -615,14 +593,14 @@ class BBJNews(object):
|
|||
}
|
||||
|
||||
|
||||
# unused
|
||||
def format_message(self, body, format="sequential"):
|
||||
"""
|
||||
Send `body` to the server to be formatted according to `format`,
|
||||
defaulting to the sequential parser. Returns the body object.
|
||||
"""
|
||||
response = self("format_message", body=body, format=format)
|
||||
return response["data"]
|
||||
return [[(None, body)]]
|
||||
#response = self("format_message", body=body, format=format)
|
||||
#return response["data"]
|
||||
|
||||
# unsupported
|
||||
def message_delete(self, thread_id, post_id):
|
||||
|
@ -747,8 +725,78 @@ class BBJNews(object):
|
|||
"messages": response["data"]["messages"]
|
||||
}
|
||||
|
||||
def _overviews_to_threads_fancy(overviews):
|
||||
# build up a map of message references
|
||||
# we use a disjoint-set data structure
|
||||
# to find the root of each message
|
||||
threadmap = {}
|
||||
def find(id):
|
||||
parent = threadmap.setdefault(id, id)
|
||||
if parent == id:
|
||||
return id
|
||||
root = find(parent)
|
||||
if root != parent:
|
||||
threadmap[id] = root
|
||||
return root
|
||||
|
||||
messages = {}
|
||||
for num, ov in overviews:
|
||||
try:
|
||||
msgid = nntplib.decode_header(ov['message-id']).strip()
|
||||
refs = _parse_message_ids(nntplib.decode_header(ov['references']))
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
messages[msgid] = (num, msgid, ov)
|
||||
for r in refs:
|
||||
threadmap[find(msgid)] = find(r)
|
||||
|
||||
thread_messages = {}
|
||||
for id in messages:
|
||||
root = find(id)
|
||||
l = thread_messages.setdefault(root, [])
|
||||
l.append(messages[id])
|
||||
|
||||
threads = []
|
||||
for id, messages in thread_messages.items():
|
||||
messages.sort(key=lambda x: x[0])
|
||||
first = messages[0][2]
|
||||
last = messages[-1][2]
|
||||
try:
|
||||
d = nntplib.decode_header(first['date'])
|
||||
d = email.utils.mktime_tz(email.utils.parsedate_tz(d))
|
||||
d2 = nntplib.decode_header(last['date'])
|
||||
d2 = email.utils.mktime_tz(email.utils.parsedate_tz(d2))
|
||||
t = {
|
||||
'pinned': False,
|
||||
'title': nntplib.decode_header(first['subject']),
|
||||
'reply_count': len(messages),
|
||||
'thread_id': nntplib.decode_header(first['message-id']),
|
||||
'author': nntplib.decode_header(first['from']),
|
||||
'created': d,
|
||||
'last_author': nntplib.decode_header(last['from']),
|
||||
'last_mod': d2,
|
||||
}
|
||||
except (ValueError, KeyError, IndexError):
|
||||
continue
|
||||
else:
|
||||
threads.append(t)
|
||||
|
||||
return threads
|
||||
|
||||
|
||||
def _overview_to_threads(overviews):
|
||||
# make every message its own thread, for prototyping purposes
|
||||
#t = {
|
||||
# 'title': str,
|
||||
# 'reply_count': int, # does this include the OP?
|
||||
# 'pinned': bool,
|
||||
# 'thread_id': uuid
|
||||
# 'author': user_uuid,
|
||||
# 'created': time,
|
||||
# 'last_mod': time,
|
||||
# 'last_author': user_uuid,
|
||||
#}
|
||||
threads = []
|
||||
for num, ov in overviews:
|
||||
try:
|
||||
|
@ -782,7 +830,20 @@ def _test_overview_to_threads():
|
|||
print(t)
|
||||
|
||||
|
||||
Address = namedtuple('Address', 'name, address')
|
||||
_atext = r"[a-zA-Z0-9!#$%&'\*\+\-/=?^_`{|}~]" # RFC 5322 §3.2.3
|
||||
_dotatext = r"%s+(?:\.%s+)*" % (_atext, _atext)
|
||||
_mdtext = r"\[[!-=\?-Z^-~]\]"
|
||||
_msg_id_re = re.compile(r'<%s@(?:%s|%s)>' % (_dotatext, _dotatext, _mdtext)) # RFC 5536 §3.1.3
|
||||
|
||||
def _parse_message_ids(s):
|
||||
"""parses a list of message ids separated by junk"""
|
||||
return _msg_id_re.findall(s)
|
||||
|
||||
class Address(namedtuple('Address', 'name, address')):
|
||||
@property
|
||||
def user(self):
|
||||
user, _, _ = self.address.partition("@")
|
||||
return user
|
||||
|
||||
def _parse_single_address(value):
|
||||
# the email.headerregistry api is truly bizarre
|
||||
|
|
Loading…
Reference in New Issue