moar nntp, parse thread overviews
parent
3bc32e208f
commit
8acd97fa9b
|
@ -4,9 +4,11 @@ import nntplib
|
|||
from hashlib import sha256
|
||||
import time
|
||||
import json
|
||||
import ssl
|
||||
import email.utils
|
||||
from collections import namedtuple
|
||||
|
||||
|
||||
class BBJ(object):
|
||||
class BBJNews(object):
|
||||
# this module isnt exactly complete. The below description claims
|
||||
# `all of its endpoints are mapped to native methods` though this
|
||||
# is not yet true. The documentation for the API is not yet
|
||||
|
@ -72,14 +74,33 @@ class BBJ(object):
|
|||
If you set this to False, anonymous network usage is
|
||||
guaranteed.
|
||||
"""
|
||||
self.base = "http{}://{}:{}/api/%s".format("s" if https else "", host, port)
|
||||
self.user_name = self.user_auth = None
|
||||
self.host = host
|
||||
self.port = port
|
||||
self.tls = https
|
||||
self.group = "tilde.team"
|
||||
|
||||
self.user_name = None
|
||||
self.user_auth = None
|
||||
self.send_auth = True
|
||||
try:
|
||||
self.user = self("get_me")["data"]
|
||||
self.update_instance_info()
|
||||
except URLError:
|
||||
raise URLError("Cannot connect to %s (is the server down?)" % self.base[0:-2])
|
||||
self.user = {
|
||||
"user_id": "",
|
||||
"user_name": "",
|
||||
"is_admin": False,
|
||||
"color": 0,
|
||||
}
|
||||
self.instance_info = {
|
||||
"instance_name": "",
|
||||
"allow_anon": True,
|
||||
"admins": [],
|
||||
}
|
||||
|
||||
self.connect()
|
||||
|
||||
#try:
|
||||
# self.user = self("get_me")["data"]
|
||||
# self.update_instance_info()
|
||||
#except URLError:
|
||||
# raise URLError("Cannot connect to %s (is the server down?)" % self.base[0:-2])
|
||||
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
|
@ -97,6 +118,17 @@ class BBJ(object):
|
|||
return sha256(bytes(string, "utf8")).hexdigest()
|
||||
|
||||
|
||||
|
||||
def connect(self):
|
||||
self.conn = nntplib.NNTP(self.host, self.port)
|
||||
if self.tls:
|
||||
context = ssl.create_default_context()
|
||||
self.conn.starttls(context)
|
||||
|
||||
|
||||
# TODO: reconnect automatically
|
||||
|
||||
|
||||
def request(self, endpoint, **params):
|
||||
"""
|
||||
Takes the string endpoint, and a variable number of kwargs
|
||||
|
@ -251,6 +283,7 @@ class BBJ(object):
|
|||
]
|
||||
|
||||
|
||||
# AUTHINFO?
|
||||
def set_credentials(self, user_name, user_auth, hash_auth=True, check_validity=True):
|
||||
"""
|
||||
Internalizes user_name and user_auth. Unless hash_auth=False is
|
||||
|
@ -287,8 +320,7 @@ class BBJ(object):
|
|||
hash_auth=False
|
||||
)
|
||||
"""
|
||||
if hash_auth:
|
||||
user_auth = self._hash(user_auth)
|
||||
self.conn.login(user_name, user_auth, usernetrc=False)
|
||||
|
||||
if check_validity and not self.validate_credentials(user_name, user_auth):
|
||||
self.user_auth = self.user_name = None
|
||||
|
@ -300,7 +332,7 @@ class BBJ(object):
|
|||
return True
|
||||
|
||||
|
||||
# AUTHINFO?
|
||||
# unused
|
||||
def validate_credentials(self, user_name, user_auth, exception=True):
|
||||
"""
|
||||
Pings the server to check that user_name can be authenticated with
|
||||
|
@ -440,8 +472,67 @@ class BBJ(object):
|
|||
author_id = thread["author"]
|
||||
print(usermap[author_id]["user_name"])
|
||||
"""
|
||||
response = self("thread_index", include_op=include_op)
|
||||
return response["data"], response["usermap"]
|
||||
response, count, first, last, name = self.conn.group(self.group)
|
||||
|
||||
|
||||
# overviews is a list of (article_number, overview) tuples,
|
||||
# one for each article.
|
||||
#
|
||||
# Each overview is a dictionary containing at least:
|
||||
#
|
||||
# subject, from, date,
|
||||
# message-id, references - article headers
|
||||
# :bytes - the number of bytes in the article
|
||||
# :lines - the number of lines in the body (deprecated)
|
||||
|
||||
response, overviews = self.conn.over((first, None))
|
||||
|
||||
if False:
|
||||
# build up a map of message references
|
||||
# we use a disjoint-set data structure
|
||||
# to find the root of each message
|
||||
threadmap = {}
|
||||
rank = {}
|
||||
for num, ov in overviews:
|
||||
msgid = nntplib.decode_header(ov['message-id'])
|
||||
# RFC5536 suggests that whitespace should not occur inside
|
||||
# a message id, which (if true) makes it pretty easy to split
|
||||
# the list of message ids in the references header
|
||||
refs = nntplib.decode_header(ov['references']).split()
|
||||
for r in refs:
|
||||
threadmap[msgid] = r
|
||||
rank[msgid] = 1
|
||||
# TODO
|
||||
else:
|
||||
# make every message its own thread, for prototyping purposes
|
||||
#t = {
|
||||
# 'title': str,
|
||||
# 'reply_count': int, # does this include the OP?
|
||||
# 'pinned': bool,
|
||||
# 'thread_id': uuid
|
||||
# 'author': user_uuid,
|
||||
# 'created': time,
|
||||
# 'last_mod': time,
|
||||
# 'last_author': user_uuid,
|
||||
#}
|
||||
threads = _overview_to_threads(overviews)
|
||||
|
||||
# make usermap
|
||||
usermap = {}
|
||||
for num, ov in overviews:
|
||||
userid = nntplib.decode_header(ov['from'])
|
||||
if userid in usermap:
|
||||
continue
|
||||
addr = _parse_single_address(userid)
|
||||
usermap[userid] = {
|
||||
'user_id': userid,
|
||||
'user_name': addr.name,
|
||||
'address': addr.address,
|
||||
'color': colorhash(userid),
|
||||
'is_admin': False, # TODO: LIST MODERATORS?
|
||||
}
|
||||
|
||||
return threads, usermap
|
||||
|
||||
|
||||
def thread_load(self, thread_id, format=None, op_only=False):
|
||||
|
@ -628,3 +719,56 @@ class BBJ(object):
|
|||
"threads": response["data"]["threads"],
|
||||
"messages": response["data"]["messages"]
|
||||
}
|
||||
|
||||
|
||||
def _overview_to_threads(overviews):
|
||||
threads = []
|
||||
for num, ov in overviews:
|
||||
try:
|
||||
d = nntplib.decode_header(ov['date'])
|
||||
d = email.utils.mktime_tz(email.utils.parsedate_tz(d))
|
||||
t = {
|
||||
'pinned': False,
|
||||
'title': nntplib.decode_header(ov['subject']),
|
||||
'reply_count': 1,
|
||||
'thread_id': nntplib.decode_header(ov['message-id']),
|
||||
'author': nntplib.decode_header(ov['from']),
|
||||
'created': d,
|
||||
'last_mod': d,
|
||||
'last_author': nntplib.decode_header(ov['from']),
|
||||
# nntp-specific fields
|
||||
'news_id': num,
|
||||
}
|
||||
except ValueError:
|
||||
continue
|
||||
else:
|
||||
threads.append(t)
|
||||
return threads
|
||||
|
||||
def _test_overview_to_threads():
|
||||
with open("overview.txt") as f:
|
||||
lines = f.readlines()
|
||||
fmt = ['subject', 'from', 'date', 'message-id', 'references', ':bytes', ':lines', 'xref']
|
||||
overviews = nntplib._parse_overview(lines, fmt)
|
||||
threads = _overview_to_threads(overviews)
|
||||
for t in threads:
|
||||
print(t)
|
||||
|
||||
|
||||
Address = namedtuple('Address', 'name, address')
|
||||
|
||||
def _parse_single_address(value):
|
||||
# the email.headerregistry api is truly bizarre
|
||||
# and involves *constructing a class on the fly*
|
||||
# to parse a simple value.
|
||||
# there's a get_mailbox function that does exactly
|
||||
# what we want but it's in the internal _header_value_parser
|
||||
# module so we probably shoudn't use it.
|
||||
name, addr = email.utils.parseaddr(value)
|
||||
if name == '' and addr == '':
|
||||
raise ValueError(value)
|
||||
return Address(name, addr)
|
||||
|
||||
def colorhash(s):
|
||||
h = hash(s)
|
||||
return h % 7
|
||||
|
|
Loading…
Reference in New Issue