moar nntp, parse thread overviews

magical 2022-08-08 00:19:43 +00:00
parent 3bc32e208f
commit 8acd97fa9b
1 changed files with 158 additions and 14 deletions

View File

@ -4,9 +4,11 @@ import nntplib
from hashlib import sha256
import time
import json
import ssl
import email.utils
from collections import namedtuple
class BBJ(object):
class BBJNews(object):
# this module isnt exactly complete. The below description claims
# `all of its endpoints are mapped to native methods` though this
# is not yet true. The documentation for the API is not yet
@ -72,14 +74,33 @@ class BBJ(object):
If you set this to False, anonymous network usage is
guaranteed.
"""
self.base = "http{}://{}:{}/api/%s".format("s" if https else "", host, port)
self.user_name = self.user_auth = None
self.host = host
self.port = port
self.tls = https
self.group = "tilde.team"
self.user_name = None
self.user_auth = None
self.send_auth = True
try:
self.user = self("get_me")["data"]
self.update_instance_info()
except URLError:
raise URLError("Cannot connect to %s (is the server down?)" % self.base[0:-2])
self.user = {
"user_id": "",
"user_name": "",
"is_admin": False,
"color": 0,
}
self.instance_info = {
"instance_name": "",
"allow_anon": True,
"admins": [],
}
self.connect()
#try:
# self.user = self("get_me")["data"]
# self.update_instance_info()
#except URLError:
# raise URLError("Cannot connect to %s (is the server down?)" % self.base[0:-2])
def __call__(self, *args, **kwargs):
@ -97,6 +118,17 @@ class BBJ(object):
return sha256(bytes(string, "utf8")).hexdigest()
def connect(self):
self.conn = nntplib.NNTP(self.host, self.port)
if self.tls:
context = ssl.create_default_context()
self.conn.starttls(context)
# TODO: reconnect automatically
def request(self, endpoint, **params):
"""
Takes the string endpoint, and a variable number of kwargs
@ -251,6 +283,7 @@ class BBJ(object):
]
# AUTHINFO?
def set_credentials(self, user_name, user_auth, hash_auth=True, check_validity=True):
"""
Internalizes user_name and user_auth. Unless hash_auth=False is
@ -287,8 +320,7 @@ class BBJ(object):
hash_auth=False
)
"""
if hash_auth:
user_auth = self._hash(user_auth)
self.conn.login(user_name, user_auth, usernetrc=False)
if check_validity and not self.validate_credentials(user_name, user_auth):
self.user_auth = self.user_name = None
@ -300,7 +332,7 @@ class BBJ(object):
return True
# AUTHINFO?
# unused
def validate_credentials(self, user_name, user_auth, exception=True):
"""
Pings the server to check that user_name can be authenticated with
@ -440,8 +472,67 @@ class BBJ(object):
author_id = thread["author"]
print(usermap[author_id]["user_name"])
"""
response = self("thread_index", include_op=include_op)
return response["data"], response["usermap"]
response, count, first, last, name = self.conn.group(self.group)
# overviews is a list of (article_number, overview) tuples,
# one for each article.
#
# Each overview is a dictionary containing at least:
#
# subject, from, date,
# message-id, references - article headers
# :bytes - the number of bytes in the article
# :lines - the number of lines in the body (deprecated)
response, overviews = self.conn.over((first, None))
if False:
# build up a map of message references
# we use a disjoint-set data structure
# to find the root of each message
threadmap = {}
rank = {}
for num, ov in overviews:
msgid = nntplib.decode_header(ov['message-id'])
# RFC5536 suggests that whitespace should not occur inside
# a message id, which (if true) makes it pretty easy to split
# the list of message ids in the references header
refs = nntplib.decode_header(ov['references']).split()
for r in refs:
threadmap[msgid] = r
rank[msgid] = 1
# TODO
else:
# make every message its own thread, for prototyping purposes
#t = {
# 'title': str,
# 'reply_count': int, # does this include the OP?
# 'pinned': bool,
# 'thread_id': uuid
# 'author': user_uuid,
# 'created': time,
# 'last_mod': time,
# 'last_author': user_uuid,
#}
threads = _overview_to_threads(overviews)
# make usermap
usermap = {}
for num, ov in overviews:
userid = nntplib.decode_header(ov['from'])
if userid in usermap:
continue
addr = _parse_single_address(userid)
usermap[userid] = {
'user_id': userid,
'user_name': addr.name,
'address': addr.address,
'color': colorhash(userid),
'is_admin': False, # TODO: LIST MODERATORS?
}
return threads, usermap
def thread_load(self, thread_id, format=None, op_only=False):
@ -628,3 +719,56 @@ class BBJ(object):
"threads": response["data"]["threads"],
"messages": response["data"]["messages"]
}
def _overview_to_threads(overviews):
threads = []
for num, ov in overviews:
try:
d = nntplib.decode_header(ov['date'])
d = email.utils.mktime_tz(email.utils.parsedate_tz(d))
t = {
'pinned': False,
'title': nntplib.decode_header(ov['subject']),
'reply_count': 1,
'thread_id': nntplib.decode_header(ov['message-id']),
'author': nntplib.decode_header(ov['from']),
'created': d,
'last_mod': d,
'last_author': nntplib.decode_header(ov['from']),
# nntp-specific fields
'news_id': num,
}
except ValueError:
continue
else:
threads.append(t)
return threads
def _test_overview_to_threads():
with open("overview.txt") as f:
lines = f.readlines()
fmt = ['subject', 'from', 'date', 'message-id', 'references', ':bytes', ':lines', 'xref']
overviews = nntplib._parse_overview(lines, fmt)
threads = _overview_to_threads(overviews)
for t in threads:
print(t)
Address = namedtuple('Address', 'name, address')
def _parse_single_address(value):
# the email.headerregistry api is truly bizarre
# and involves *constructing a class on the fly*
# to parse a simple value.
# there's a get_mailbox function that does exactly
# what we want but it's in the internal _header_value_parser
# module so we probably shoudn't use it.
name, addr = email.utils.parseaddr(value)
if name == '' and addr == '':
raise ValueError(value)
return Address(name, addr)
def colorhash(s):
h = hash(s)
return h % 7