thread grouping, more or less
parent
2718e223c6
commit
d2d599318f
|
@ -7,9 +7,12 @@ import nntplib
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
import ssl
|
import ssl
|
||||||
|
import re
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
__all__ = ('BBJNews','URLError')
|
||||||
|
|
||||||
class BBJNews(object):
|
class BBJNews(object):
|
||||||
# this module isnt exactly complete. The below description claims
|
# this module isnt exactly complete. The below description claims
|
||||||
# `all of its endpoints are mapped to native methods` though this
|
# `all of its endpoints are mapped to native methods` though this
|
||||||
|
@ -506,35 +509,8 @@ class BBJNews(object):
|
||||||
# :bytes - the number of bytes in the article
|
# :bytes - the number of bytes in the article
|
||||||
# :lines - the number of lines in the body (deprecated)
|
# :lines - the number of lines in the body (deprecated)
|
||||||
|
|
||||||
if False:
|
# see also: https://www.jwz.org/doc/threading.html
|
||||||
# build up a map of message references
|
threads = _overviews_to_threads_fancy(overviews)
|
||||||
# we use a disjoint-set data structure
|
|
||||||
# to find the root of each message
|
|
||||||
threadmap = {}
|
|
||||||
rank = {}
|
|
||||||
for num, ov in overviews:
|
|
||||||
msgid = nntplib.decode_header(ov['message-id'])
|
|
||||||
# RFC5536 suggests that whitespace should not occur inside
|
|
||||||
# a message id, which (if true) makes it pretty easy to split
|
|
||||||
# the list of message ids in the references header
|
|
||||||
refs = nntplib.decode_header(ov['references']).split()
|
|
||||||
for r in refs:
|
|
||||||
threadmap[msgid] = r
|
|
||||||
rank[msgid] = 1
|
|
||||||
# TODO
|
|
||||||
else:
|
|
||||||
# make every message its own thread, for prototyping purposes
|
|
||||||
#t = {
|
|
||||||
# 'title': str,
|
|
||||||
# 'reply_count': int, # does this include the OP?
|
|
||||||
# 'pinned': bool,
|
|
||||||
# 'thread_id': uuid
|
|
||||||
# 'author': user_uuid,
|
|
||||||
# 'created': time,
|
|
||||||
# 'last_mod': time,
|
|
||||||
# 'last_author': user_uuid,
|
|
||||||
#}
|
|
||||||
threads = _overview_to_threads(overviews)
|
|
||||||
|
|
||||||
# make usermap
|
# make usermap
|
||||||
usermap = {}
|
usermap = {}
|
||||||
|
@ -545,12 +521,13 @@ class BBJNews(object):
|
||||||
addr = _parse_single_address(userid)
|
addr = _parse_single_address(userid)
|
||||||
usermap[userid] = {
|
usermap[userid] = {
|
||||||
'user_id': userid,
|
'user_id': userid,
|
||||||
'user_name': addr.name,
|
'user_name': addr.name or addr.user,
|
||||||
'address': addr.address,
|
'address': addr.address,
|
||||||
'color': colorhash(userid),
|
'color': colorhash(userid),
|
||||||
'is_admin': False, # TODO: LIST MODERATORS?
|
'is_admin': False, # TODO: LIST MODERATORS?
|
||||||
}
|
}
|
||||||
|
|
||||||
|
threads.sort(key=lambda x: x['last_mod'], reverse=True)
|
||||||
return threads, usermap
|
return threads, usermap
|
||||||
|
|
||||||
|
|
||||||
|
@ -565,7 +542,8 @@ class BBJNews(object):
|
||||||
print(usermap[author_id]["user_name"])
|
print(usermap[author_id]["user_name"])
|
||||||
print(message["body"])
|
print(message["body"])
|
||||||
"""
|
"""
|
||||||
return {}, {}
|
m = self.fake_message('oops...')
|
||||||
|
return {"title":"", "messages":[m], "author":m['author']}, {m['author']: self.user}
|
||||||
|
|
||||||
response = self("thread_load",
|
response = self("thread_load",
|
||||||
format=format, thread_id=thread_id, op_only=op_only)
|
format=format, thread_id=thread_id, op_only=op_only)
|
||||||
|
@ -615,14 +593,14 @@ class BBJNews(object):
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# unused
|
|
||||||
def format_message(self, body, format="sequential"):
|
def format_message(self, body, format="sequential"):
|
||||||
"""
|
"""
|
||||||
Send `body` to the server to be formatted according to `format`,
|
Send `body` to the server to be formatted according to `format`,
|
||||||
defaulting to the sequential parser. Returns the body object.
|
defaulting to the sequential parser. Returns the body object.
|
||||||
"""
|
"""
|
||||||
response = self("format_message", body=body, format=format)
|
return [[(None, body)]]
|
||||||
return response["data"]
|
#response = self("format_message", body=body, format=format)
|
||||||
|
#return response["data"]
|
||||||
|
|
||||||
# unsupported
|
# unsupported
|
||||||
def message_delete(self, thread_id, post_id):
|
def message_delete(self, thread_id, post_id):
|
||||||
|
@ -747,8 +725,78 @@ class BBJNews(object):
|
||||||
"messages": response["data"]["messages"]
|
"messages": response["data"]["messages"]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _overviews_to_threads_fancy(overviews):
|
||||||
|
# build up a map of message references
|
||||||
|
# we use a disjoint-set data structure
|
||||||
|
# to find the root of each message
|
||||||
|
threadmap = {}
|
||||||
|
def find(id):
|
||||||
|
parent = threadmap.setdefault(id, id)
|
||||||
|
if parent == id:
|
||||||
|
return id
|
||||||
|
root = find(parent)
|
||||||
|
if root != parent:
|
||||||
|
threadmap[id] = root
|
||||||
|
return root
|
||||||
|
|
||||||
|
messages = {}
|
||||||
|
for num, ov in overviews:
|
||||||
|
try:
|
||||||
|
msgid = nntplib.decode_header(ov['message-id']).strip()
|
||||||
|
refs = _parse_message_ids(nntplib.decode_header(ov['references']))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
messages[msgid] = (num, msgid, ov)
|
||||||
|
for r in refs:
|
||||||
|
threadmap[find(msgid)] = find(r)
|
||||||
|
|
||||||
|
thread_messages = {}
|
||||||
|
for id in messages:
|
||||||
|
root = find(id)
|
||||||
|
l = thread_messages.setdefault(root, [])
|
||||||
|
l.append(messages[id])
|
||||||
|
|
||||||
|
threads = []
|
||||||
|
for id, messages in thread_messages.items():
|
||||||
|
messages.sort(key=lambda x: x[0])
|
||||||
|
first = messages[0][2]
|
||||||
|
last = messages[-1][2]
|
||||||
|
try:
|
||||||
|
d = nntplib.decode_header(first['date'])
|
||||||
|
d = email.utils.mktime_tz(email.utils.parsedate_tz(d))
|
||||||
|
d2 = nntplib.decode_header(last['date'])
|
||||||
|
d2 = email.utils.mktime_tz(email.utils.parsedate_tz(d2))
|
||||||
|
t = {
|
||||||
|
'pinned': False,
|
||||||
|
'title': nntplib.decode_header(first['subject']),
|
||||||
|
'reply_count': len(messages),
|
||||||
|
'thread_id': nntplib.decode_header(first['message-id']),
|
||||||
|
'author': nntplib.decode_header(first['from']),
|
||||||
|
'created': d,
|
||||||
|
'last_author': nntplib.decode_header(last['from']),
|
||||||
|
'last_mod': d2,
|
||||||
|
}
|
||||||
|
except (ValueError, KeyError, IndexError):
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
threads.append(t)
|
||||||
|
|
||||||
|
return threads
|
||||||
|
|
||||||
|
|
||||||
def _overview_to_threads(overviews):
|
def _overview_to_threads(overviews):
|
||||||
|
# make every message its own thread, for prototyping purposes
|
||||||
|
#t = {
|
||||||
|
# 'title': str,
|
||||||
|
# 'reply_count': int, # does this include the OP?
|
||||||
|
# 'pinned': bool,
|
||||||
|
# 'thread_id': uuid
|
||||||
|
# 'author': user_uuid,
|
||||||
|
# 'created': time,
|
||||||
|
# 'last_mod': time,
|
||||||
|
# 'last_author': user_uuid,
|
||||||
|
#}
|
||||||
threads = []
|
threads = []
|
||||||
for num, ov in overviews:
|
for num, ov in overviews:
|
||||||
try:
|
try:
|
||||||
|
@ -782,7 +830,20 @@ def _test_overview_to_threads():
|
||||||
print(t)
|
print(t)
|
||||||
|
|
||||||
|
|
||||||
Address = namedtuple('Address', 'name, address')
|
_atext = r"[a-zA-Z0-9!#$%&'\*\+\-/=?^_`{|}~]" # RFC 5322 §3.2.3
|
||||||
|
_dotatext = r"%s+(?:\.%s+)*" % (_atext, _atext)
|
||||||
|
_mdtext = r"\[[!-=\?-Z^-~]\]"
|
||||||
|
_msg_id_re = re.compile(r'<%s@(?:%s|%s)>' % (_dotatext, _dotatext, _mdtext)) # RFC 5536 §3.1.3
|
||||||
|
|
||||||
|
def _parse_message_ids(s):
|
||||||
|
"""parses a list of message ids separated by junk"""
|
||||||
|
return _msg_id_re.findall(s)
|
||||||
|
|
||||||
|
class Address(namedtuple('Address', 'name, address')):
|
||||||
|
@property
|
||||||
|
def user(self):
|
||||||
|
user, _, _ = self.address.partition("@")
|
||||||
|
return user
|
||||||
|
|
||||||
def _parse_single_address(value):
|
def _parse_single_address(value):
|
||||||
# the email.headerregistry api is truly bizarre
|
# the email.headerregistry api is truly bizarre
|
||||||
|
|
Loading…
Reference in New Issue