moar nntp, parse thread overviews
parent
3bc32e208f
commit
8acd97fa9b
|
@ -4,9 +4,11 @@ import nntplib
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
|
import ssl
|
||||||
|
import email.utils
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
class BBJNews(object):
|
||||||
class BBJ(object):
|
|
||||||
# this module isnt exactly complete. The below description claims
|
# this module isnt exactly complete. The below description claims
|
||||||
# `all of its endpoints are mapped to native methods` though this
|
# `all of its endpoints are mapped to native methods` though this
|
||||||
# is not yet true. The documentation for the API is not yet
|
# is not yet true. The documentation for the API is not yet
|
||||||
|
@ -72,14 +74,33 @@ class BBJ(object):
|
||||||
If you set this to False, anonymous network usage is
|
If you set this to False, anonymous network usage is
|
||||||
guaranteed.
|
guaranteed.
|
||||||
"""
|
"""
|
||||||
self.base = "http{}://{}:{}/api/%s".format("s" if https else "", host, port)
|
self.host = host
|
||||||
self.user_name = self.user_auth = None
|
self.port = port
|
||||||
|
self.tls = https
|
||||||
|
self.group = "tilde.team"
|
||||||
|
|
||||||
|
self.user_name = None
|
||||||
|
self.user_auth = None
|
||||||
self.send_auth = True
|
self.send_auth = True
|
||||||
try:
|
self.user = {
|
||||||
self.user = self("get_me")["data"]
|
"user_id": "",
|
||||||
self.update_instance_info()
|
"user_name": "",
|
||||||
except URLError:
|
"is_admin": False,
|
||||||
raise URLError("Cannot connect to %s (is the server down?)" % self.base[0:-2])
|
"color": 0,
|
||||||
|
}
|
||||||
|
self.instance_info = {
|
||||||
|
"instance_name": "",
|
||||||
|
"allow_anon": True,
|
||||||
|
"admins": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
self.connect()
|
||||||
|
|
||||||
|
#try:
|
||||||
|
# self.user = self("get_me")["data"]
|
||||||
|
# self.update_instance_info()
|
||||||
|
#except URLError:
|
||||||
|
# raise URLError("Cannot connect to %s (is the server down?)" % self.base[0:-2])
|
||||||
|
|
||||||
|
|
||||||
def __call__(self, *args, **kwargs):
|
def __call__(self, *args, **kwargs):
|
||||||
|
@ -97,6 +118,17 @@ class BBJ(object):
|
||||||
return sha256(bytes(string, "utf8")).hexdigest()
|
return sha256(bytes(string, "utf8")).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
self.conn = nntplib.NNTP(self.host, self.port)
|
||||||
|
if self.tls:
|
||||||
|
context = ssl.create_default_context()
|
||||||
|
self.conn.starttls(context)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: reconnect automatically
|
||||||
|
|
||||||
|
|
||||||
def request(self, endpoint, **params):
|
def request(self, endpoint, **params):
|
||||||
"""
|
"""
|
||||||
Takes the string endpoint, and a variable number of kwargs
|
Takes the string endpoint, and a variable number of kwargs
|
||||||
|
@ -251,6 +283,7 @@ class BBJ(object):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# AUTHINFO?
|
||||||
def set_credentials(self, user_name, user_auth, hash_auth=True, check_validity=True):
|
def set_credentials(self, user_name, user_auth, hash_auth=True, check_validity=True):
|
||||||
"""
|
"""
|
||||||
Internalizes user_name and user_auth. Unless hash_auth=False is
|
Internalizes user_name and user_auth. Unless hash_auth=False is
|
||||||
|
@ -287,8 +320,7 @@ class BBJ(object):
|
||||||
hash_auth=False
|
hash_auth=False
|
||||||
)
|
)
|
||||||
"""
|
"""
|
||||||
if hash_auth:
|
self.conn.login(user_name, user_auth, usernetrc=False)
|
||||||
user_auth = self._hash(user_auth)
|
|
||||||
|
|
||||||
if check_validity and not self.validate_credentials(user_name, user_auth):
|
if check_validity and not self.validate_credentials(user_name, user_auth):
|
||||||
self.user_auth = self.user_name = None
|
self.user_auth = self.user_name = None
|
||||||
|
@ -300,7 +332,7 @@ class BBJ(object):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
# AUTHINFO?
|
# unused
|
||||||
def validate_credentials(self, user_name, user_auth, exception=True):
|
def validate_credentials(self, user_name, user_auth, exception=True):
|
||||||
"""
|
"""
|
||||||
Pings the server to check that user_name can be authenticated with
|
Pings the server to check that user_name can be authenticated with
|
||||||
|
@ -440,8 +472,67 @@ class BBJ(object):
|
||||||
author_id = thread["author"]
|
author_id = thread["author"]
|
||||||
print(usermap[author_id]["user_name"])
|
print(usermap[author_id]["user_name"])
|
||||||
"""
|
"""
|
||||||
response = self("thread_index", include_op=include_op)
|
response, count, first, last, name = self.conn.group(self.group)
|
||||||
return response["data"], response["usermap"]
|
|
||||||
|
|
||||||
|
# overviews is a list of (article_number, overview) tuples,
|
||||||
|
# one for each article.
|
||||||
|
#
|
||||||
|
# Each overview is a dictionary containing at least:
|
||||||
|
#
|
||||||
|
# subject, from, date,
|
||||||
|
# message-id, references - article headers
|
||||||
|
# :bytes - the number of bytes in the article
|
||||||
|
# :lines - the number of lines in the body (deprecated)
|
||||||
|
|
||||||
|
response, overviews = self.conn.over((first, None))
|
||||||
|
|
||||||
|
if False:
|
||||||
|
# build up a map of message references
|
||||||
|
# we use a disjoint-set data structure
|
||||||
|
# to find the root of each message
|
||||||
|
threadmap = {}
|
||||||
|
rank = {}
|
||||||
|
for num, ov in overviews:
|
||||||
|
msgid = nntplib.decode_header(ov['message-id'])
|
||||||
|
# RFC5536 suggests that whitespace should not occur inside
|
||||||
|
# a message id, which (if true) makes it pretty easy to split
|
||||||
|
# the list of message ids in the references header
|
||||||
|
refs = nntplib.decode_header(ov['references']).split()
|
||||||
|
for r in refs:
|
||||||
|
threadmap[msgid] = r
|
||||||
|
rank[msgid] = 1
|
||||||
|
# TODO
|
||||||
|
else:
|
||||||
|
# make every message its own thread, for prototyping purposes
|
||||||
|
#t = {
|
||||||
|
# 'title': str,
|
||||||
|
# 'reply_count': int, # does this include the OP?
|
||||||
|
# 'pinned': bool,
|
||||||
|
# 'thread_id': uuid
|
||||||
|
# 'author': user_uuid,
|
||||||
|
# 'created': time,
|
||||||
|
# 'last_mod': time,
|
||||||
|
# 'last_author': user_uuid,
|
||||||
|
#}
|
||||||
|
threads = _overview_to_threads(overviews)
|
||||||
|
|
||||||
|
# make usermap
|
||||||
|
usermap = {}
|
||||||
|
for num, ov in overviews:
|
||||||
|
userid = nntplib.decode_header(ov['from'])
|
||||||
|
if userid in usermap:
|
||||||
|
continue
|
||||||
|
addr = _parse_single_address(userid)
|
||||||
|
usermap[userid] = {
|
||||||
|
'user_id': userid,
|
||||||
|
'user_name': addr.name,
|
||||||
|
'address': addr.address,
|
||||||
|
'color': colorhash(userid),
|
||||||
|
'is_admin': False, # TODO: LIST MODERATORS?
|
||||||
|
}
|
||||||
|
|
||||||
|
return threads, usermap
|
||||||
|
|
||||||
|
|
||||||
def thread_load(self, thread_id, format=None, op_only=False):
|
def thread_load(self, thread_id, format=None, op_only=False):
|
||||||
|
@ -628,3 +719,56 @@ class BBJ(object):
|
||||||
"threads": response["data"]["threads"],
|
"threads": response["data"]["threads"],
|
||||||
"messages": response["data"]["messages"]
|
"messages": response["data"]["messages"]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _overview_to_threads(overviews):
|
||||||
|
threads = []
|
||||||
|
for num, ov in overviews:
|
||||||
|
try:
|
||||||
|
d = nntplib.decode_header(ov['date'])
|
||||||
|
d = email.utils.mktime_tz(email.utils.parsedate_tz(d))
|
||||||
|
t = {
|
||||||
|
'pinned': False,
|
||||||
|
'title': nntplib.decode_header(ov['subject']),
|
||||||
|
'reply_count': 1,
|
||||||
|
'thread_id': nntplib.decode_header(ov['message-id']),
|
||||||
|
'author': nntplib.decode_header(ov['from']),
|
||||||
|
'created': d,
|
||||||
|
'last_mod': d,
|
||||||
|
'last_author': nntplib.decode_header(ov['from']),
|
||||||
|
# nntp-specific fields
|
||||||
|
'news_id': num,
|
||||||
|
}
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
threads.append(t)
|
||||||
|
return threads
|
||||||
|
|
||||||
|
def _test_overview_to_threads():
|
||||||
|
with open("overview.txt") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
fmt = ['subject', 'from', 'date', 'message-id', 'references', ':bytes', ':lines', 'xref']
|
||||||
|
overviews = nntplib._parse_overview(lines, fmt)
|
||||||
|
threads = _overview_to_threads(overviews)
|
||||||
|
for t in threads:
|
||||||
|
print(t)
|
||||||
|
|
||||||
|
|
||||||
|
Address = namedtuple('Address', 'name, address')
|
||||||
|
|
||||||
|
def _parse_single_address(value):
|
||||||
|
# the email.headerregistry api is truly bizarre
|
||||||
|
# and involves *constructing a class on the fly*
|
||||||
|
# to parse a simple value.
|
||||||
|
# there's a get_mailbox function that does exactly
|
||||||
|
# what we want but it's in the internal _header_value_parser
|
||||||
|
# module so we probably shoudn't use it.
|
||||||
|
name, addr = email.utils.parseaddr(value)
|
||||||
|
if name == '' and addr == '':
|
||||||
|
raise ValueError(value)
|
||||||
|
return Address(name, addr)
|
||||||
|
|
||||||
|
def colorhash(s):
|
||||||
|
h = hash(s)
|
||||||
|
return h % 7
|
||||||
|
|
Loading…
Reference in New Issue