From 19269a1adec2dbe9a6daee87e961d122f0cfeccd Mon Sep 17 00:00:00 2001 From: magical Date: Mon, 8 Aug 2022 00:19:43 +0000 Subject: [PATCH] moar nntp, parse thread overviews --- clients/nntp_client.py | 172 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 158 insertions(+), 14 deletions(-) diff --git a/clients/nntp_client.py b/clients/nntp_client.py index 612e9f1..bb8e51c 100644 --- a/clients/nntp_client.py +++ b/clients/nntp_client.py @@ -4,9 +4,11 @@ import nntplib from hashlib import sha256 import time import json +import ssl +import email.utils +from collections import namedtuple - -class BBJ(object): +class BBJNews(object): # this module isnt exactly complete. The below description claims # `all of its endpoints are mapped to native methods` though this # is not yet true. The documentation for the API is not yet @@ -72,14 +74,33 @@ class BBJ(object): If you set this to False, anonymous network usage is guaranteed. """ - self.base = "http{}://{}:{}/api/%s".format("s" if https else "", host, port) - self.user_name = self.user_auth = None + self.host = host + self.port = port + self.tls = https + self.group = "tilde.team" + + self.user_name = None + self.user_auth = None self.send_auth = True - try: - self.user = self("get_me")["data"] - self.update_instance_info() - except URLError: - raise URLError("Cannot connect to %s (is the server down?)" % self.base[0:-2]) + self.user = { + "user_id": "", + "user_name": "", + "is_admin": False, + "color": 0, + } + self.instance_info = { + "instance_name": "", + "allow_anon": True, + "admins": [], + } + + self.connect() + + #try: + # self.user = self("get_me")["data"] + # self.update_instance_info() + #except URLError: + # raise URLError("Cannot connect to %s (is the server down?)" % self.base[0:-2]) def __call__(self, *args, **kwargs): @@ -97,6 +118,17 @@ class BBJ(object): return sha256(bytes(string, "utf8")).hexdigest() + + def connect(self): + self.conn = nntplib.NNTP(self.host, self.port) + if self.tls: + context = ssl.create_default_context() + self.conn.starttls(context) + + + # TODO: reconnect automatically + + def request(self, endpoint, **params): """ Takes the string endpoint, and a variable number of kwargs @@ -251,6 +283,7 @@ class BBJ(object): ] + # AUTHINFO? def set_credentials(self, user_name, user_auth, hash_auth=True, check_validity=True): """ Internalizes user_name and user_auth. Unless hash_auth=False is @@ -287,8 +320,7 @@ class BBJ(object): hash_auth=False ) """ - if hash_auth: - user_auth = self._hash(user_auth) + self.conn.login(user_name, user_auth, usernetrc=False) if check_validity and not self.validate_credentials(user_name, user_auth): self.user_auth = self.user_name = None @@ -300,7 +332,7 @@ class BBJ(object): return True - # AUTHINFO? + # unused def validate_credentials(self, user_name, user_auth, exception=True): """ Pings the server to check that user_name can be authenticated with @@ -440,8 +472,67 @@ class BBJ(object): author_id = thread["author"] print(usermap[author_id]["user_name"]) """ - response = self("thread_index", include_op=include_op) - return response["data"], response["usermap"] + response, count, first, last, name = self.conn.group(self.group) + + + # overviews is a list of (article_number, overview) tuples, + # one for each article. + # + # Each overview is a dictionary containing at least: + # + # subject, from, date, + # message-id, references - article headers + # :bytes - the number of bytes in the article + # :lines - the number of lines in the body (deprecated) + + response, overviews = self.conn.over((first, None)) + + if False: + # build up a map of message references + # we use a disjoint-set data structure + # to find the root of each message + threadmap = {} + rank = {} + for num, ov in overviews: + msgid = nntplib.decode_header(ov['message-id']) + # RFC5536 suggests that whitespace should not occur inside + # a message id, which (if true) makes it pretty easy to split + # the list of message ids in the references header + refs = nntplib.decode_header(ov['references']).split() + for r in refs: + threadmap[msgid] = r + rank[msgid] = 1 + # TODO + else: + # make every message its own thread, for prototyping purposes + #t = { + # 'title': str, + # 'reply_count': int, # does this include the OP? + # 'pinned': bool, + # 'thread_id': uuid + # 'author': user_uuid, + # 'created': time, + # 'last_mod': time, + # 'last_author': user_uuid, + #} + threads = _overview_to_threads(overviews) + + # make usermap + usermap = {} + for num, ov in overviews: + userid = nntplib.decode_header(ov['from']) + if userid in usermap: + continue + addr = _parse_single_address(userid) + usermap[userid] = { + 'user_id': userid, + 'user_name': addr.name, + 'address': addr.address, + 'color': colorhash(userid), + 'is_admin': False, # TODO: LIST MODERATORS? + } + + return threads, usermap def thread_load(self, thread_id, format=None, op_only=False): @@ -628,3 +719,56 @@ class BBJ(object): "threads": response["data"]["threads"], "messages": response["data"]["messages"] } + + +def _overview_to_threads(overviews): + threads = [] + for num, ov in overviews: + try: + d = nntplib.decode_header(ov['date']) + d = email.utils.mktime_tz(email.utils.parsedate_tz(d)) + t = { + 'pinned': False, + 'title': nntplib.decode_header(ov['subject']), + 'reply_count': 1, + 'thread_id': nntplib.decode_header(ov['message-id']), + 'author': nntplib.decode_header(ov['from']), + 'created': d, + 'last_mod': d, + 'last_author': nntplib.decode_header(ov['from']), + # nntp-specific fields + 'news_id': num, + } + except ValueError: + continue + else: + threads.append(t) + return threads + +def _test_overview_to_threads(): + with open("overview.txt") as f: + lines = f.readlines() + fmt = ['subject', 'from', 'date', 'message-id', 'references', ':bytes', ':lines', 'xref'] + overviews = nntplib._parse_overview(lines, fmt) + threads = _overview_to_threads(overviews) + for t in threads: + print(t) + + +Address = namedtuple('Address', 'name, address') + +def _parse_single_address(value): + # the email.headerregistry api is truly bizarre + # and involves *constructing a class on the fly* + # to parse a simple value. + # there's a get_mailbox function that does exactly + # what we want but it's in the internal _header_value_parser + # module so we probably shoudn't use it. + name, addr = email.utils.parseaddr(value) + if name == '' and addr == '': + raise ValueError(value) + return Address(name, addr) + +def colorhash(s): + h = hash(s) + return h % 7