tdp.py->stats.py
parent
470bef5f05
commit
65e1ec1a5d
204
scripts/tdp.py
204
scripts/tdp.py
|
@ -1,204 +0,0 @@
|
||||||
#!/usr/local/bin/python3
|
|
||||||
|
|
||||||
# tdp.py - tilde data in tilde data protocol format.
|
|
||||||
# Copyright 2015 Michael F. Lamb <http://datagrok.org>
|
|
||||||
# License: GPLv3+
|
|
||||||
|
|
||||||
"""
|
|
||||||
Outputs JSON data conforming to "~dp (Tilde Description Protocol)" as defined
|
|
||||||
at: http://protocol.club/~datagrok/beta-wiki/tdp.html
|
|
||||||
|
|
||||||
It is a JSON structure of the form:
|
|
||||||
|
|
||||||
{
|
|
||||||
'name': (string) the name of the server.
|
|
||||||
'url': (string) the URL of the server.
|
|
||||||
'signup_url': (string) the URL of a page describing the process required to request an account on the server.
|
|
||||||
'user_count': (number) the number of users currently registered on the server.
|
|
||||||
'want_users': (boolean) whether the server is currently accepting new user requests.
|
|
||||||
'admin_email': (string) the email address of the primary server administrator.
|
|
||||||
'description': (string) a free-form description for the server.
|
|
||||||
'users': [ (array) an array of users on the server.
|
|
||||||
{
|
|
||||||
'username': (string) the username of the user.
|
|
||||||
'title': (string) the HTML title of the user’s index.html page.
|
|
||||||
'mtime': (number) a timestamp representing the last time the user’s index.html was modified.
|
|
||||||
},
|
|
||||||
...
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
We also overload this with the preexisting data format we were using in
|
|
||||||
/var/local/tildetown/tildetown-py/stats.py, which is of the form:
|
|
||||||
|
|
||||||
{
|
|
||||||
'all_users': [ (array) of users on the server.
|
|
||||||
{
|
|
||||||
'username': (string) the username of the user.
|
|
||||||
'default': (boolean) Is the user still using their unmodified default index.html?
|
|
||||||
'favicon': (string) a url to an image representing the user
|
|
||||||
},
|
|
||||||
...
|
|
||||||
]
|
|
||||||
'num_users': (number) count of all_users
|
|
||||||
'live_users': [ (array) an array of live users, same format as all_users. Users may appear in both arrays.
|
|
||||||
...
|
|
||||||
],
|
|
||||||
'num_live_users': (number) count of live users
|
|
||||||
'active_user_count': (number) count of currently logged in users
|
|
||||||
'generated_at': (string) the time this JSON was generated in '%Y-%m-%d %H:%M:%S' format.
|
|
||||||
'generated_at_msec': (number) the time this JSON was generated, in milliseconds since the epoch.
|
|
||||||
'site_name': (same as 'name' above)
|
|
||||||
'site_url': (same as 'url' above)
|
|
||||||
'uptime': (string) output of `uptime -p`
|
|
||||||
|
|
||||||
}
|
|
||||||
Usage: tdp.py > /var/www/html/tilde.json
|
|
||||||
"""
|
|
||||||
|
|
||||||
# I suppose I could import /var/local/tildetown/tildetown-py/stats.py which
|
|
||||||
# does much of the same work, but I wanted to try to make one that needs no
|
|
||||||
# venv nor 'sh' module. (Success.) Bonus: this runs in 0.127s, vs 5.2s
|
|
||||||
# for 'stats'
|
|
||||||
|
|
||||||
# FIXME: unlike stats.py, we calculate last modified only on index.html.
|
|
||||||
|
|
||||||
# FIXME: we output quite a bit of redundant data. I think we should lose
|
|
||||||
# 'live_users' and do that filtering on the client side.
|
|
||||||
|
|
||||||
# FIXME: If we're the only consumer of the stats.py data, let's change the
|
|
||||||
# client side to use 'users' and drop 'all_users'.
|
|
||||||
|
|
||||||
import datetime
|
|
||||||
import hashlib
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import pwd
|
|
||||||
import re
|
|
||||||
import struct
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
title_re = re.compile(r'<title[^>]*>(.*)</title>', re.DOTALL)
|
|
||||||
defaultindex_hash = None
|
|
||||||
|
|
||||||
# modified from https://gist.github.com/likexian/f9da722585036d372dca
|
|
||||||
XTMP_STRUCT_FMT = 'hi32s4s32s256shhiii4i20x'
|
|
||||||
XTMP_STRUCT_SIZE = struct.calcsize(XTMP_STRUCT_FMT)
|
|
||||||
XTMP_STRUCT_KEYS = [
|
|
||||||
'type', 'pid', 'line', 'id', 'user', 'host', 'e_termination', 'e_exit',
|
|
||||||
'session', 'sec', 'usec', 'addr_v6', 'unused',
|
|
||||||
]
|
|
||||||
|
|
||||||
def read_xtmp(filename):
|
|
||||||
"""Pure-python replacement for who(1) and w(1); parses the data structure
|
|
||||||
in /var/run/utmp or /var/run/wtmp, generating a dict for each entry. See
|
|
||||||
man 5 utmp for meaning of fields.
|
|
||||||
|
|
||||||
"""
|
|
||||||
# This was fun but probably not worth the trouble, since we end up having
|
|
||||||
# to use subprocess.check_output() elsewhere anyway.
|
|
||||||
with open(filename, 'rb') as fp:
|
|
||||||
for entry in iter((lambda: fp.read(XTMP_STRUCT_SIZE)), b''):
|
|
||||||
yield dict(zip(
|
|
||||||
XTMP_STRUCT_KEYS,
|
|
||||||
(i.decode('UTF-8').partition('\x00')[0] if hasattr(i, 'partition') else i
|
|
||||||
for i in struct.unpack(XTMP_STRUCT_FMT, entry))))
|
|
||||||
|
|
||||||
def active_user_count():
|
|
||||||
"""Return the count of unique usernames logged in."""
|
|
||||||
return len(set(r['user'] for r in read_xtmp('/var/run/utmp') if r['type'] == 7))
|
|
||||||
|
|
||||||
def md5sum(filename):
|
|
||||||
"""Return the md5 hash of the contents of filename as a hexidecimal string."""
|
|
||||||
# This doesn't slurp the whole file in; it reads 4k at a time.
|
|
||||||
h = hashlib.md5()
|
|
||||||
with open(filename, 'rb') as fp:
|
|
||||||
for data in iter((lambda: fp.read(4096)), b''):
|
|
||||||
h.update(data)
|
|
||||||
return h.hexdigest()
|
|
||||||
|
|
||||||
def get_title(indexhtml):
|
|
||||||
"""Given an html file, return the content of its <title>"""
|
|
||||||
print(indexhtml)
|
|
||||||
fp = open(indexhtml, 'rt', errors='ignore')
|
|
||||||
title = title_re.search(fp.read())
|
|
||||||
if title:
|
|
||||||
return title.group(1)
|
|
||||||
|
|
||||||
def get_users():
|
|
||||||
"""Generate tuples of the form (username, homedir) for all normal
|
|
||||||
users on this system.
|
|
||||||
|
|
||||||
"""
|
|
||||||
return ((p.pw_name, p.pw_dir) for p in pwd.getpwall() if
|
|
||||||
p.pw_uid >= 1000 and
|
|
||||||
p.pw_shell != '/bin/false' and
|
|
||||||
p.pw_name not in ['nobody', 'ubuntu', 'poetry'])
|
|
||||||
|
|
||||||
def tdp_user(username, homedir):
|
|
||||||
"""Given a unix username, and their home directory, return a TDP format
|
|
||||||
dict with information about that user.
|
|
||||||
|
|
||||||
"""
|
|
||||||
indexhtml = os.path.join(homedir, 'public_html', 'index.html')
|
|
||||||
return {
|
|
||||||
'username': username,
|
|
||||||
'title': get_title(indexhtml),
|
|
||||||
'mtime': int(os.path.getmtime(indexhtml) * 1000),
|
|
||||||
# tilde.town extensions and backward compatibility
|
|
||||||
# FIXME: just shelling out to diff -q might be way faster than all
|
|
||||||
# these hashes.
|
|
||||||
'default': md5sum(indexhtml) == defaultindex_hash,
|
|
||||||
'favicon': 'TODO',
|
|
||||||
}
|
|
||||||
|
|
||||||
def tdp():
|
|
||||||
now = datetime.datetime.now()
|
|
||||||
users = [tdp_user(username, homedir) for username, homedir in get_users()]
|
|
||||||
|
|
||||||
# TDP format data
|
|
||||||
data = {
|
|
||||||
'name': 'tilde.town',
|
|
||||||
'url': 'http://tilde.town',
|
|
||||||
'signup_url': 'http://goo.gl/forms/8IvQFTDjlo',
|
|
||||||
'want_users': True,
|
|
||||||
'admin_email': 'nks@lambdaphil.es',
|
|
||||||
'description': " ".join(l.strip() for l in """
|
|
||||||
an intentional digital community for creating and sharing works of
|
|
||||||
art, educating peers, and technological anachronism. we are a
|
|
||||||
completely non-commercial, donation supported, and committed to
|
|
||||||
rejecting false technological progress in favor of empathy and
|
|
||||||
sustainable computing.
|
|
||||||
""".splitlines()),
|
|
||||||
'user_count': len(users),
|
|
||||||
'users': users,
|
|
||||||
}
|
|
||||||
|
|
||||||
# tilde.town extensions and backward compatibility
|
|
||||||
data.update({
|
|
||||||
'active_user_count': active_user_count(),
|
|
||||||
'generated_at': now.strftime('%Y-%m-%d %H:%M:%S'),
|
|
||||||
'generated_at_msec': int(now.timestamp() * 1000),
|
|
||||||
'uptime': subprocess.check_output(['uptime', '-p'], universal_newlines=True),
|
|
||||||
})
|
|
||||||
# redundant entries we should drop after changing homepage template
|
|
||||||
data.update({
|
|
||||||
'all_users': data['users'],
|
|
||||||
'num_users': data['user_count'],
|
|
||||||
'live_users': [u for u in data['users'] if not u['default']],
|
|
||||||
'site_name': data['name'],
|
|
||||||
'site_url': data['url'],
|
|
||||||
})
|
|
||||||
data.update({
|
|
||||||
'num_live_users': len(data['live_users']),
|
|
||||||
})
|
|
||||||
|
|
||||||
return data
|
|
||||||
|
|
||||||
def main():
|
|
||||||
global defaultindex_hash
|
|
||||||
defaultindex_hash = md5sum("/etc/skel/public_html/index.html")
|
|
||||||
print(json.dumps(tdp(), sort_keys=True, indent=2))
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
raise SystemExit(main())
|
|
|
@ -1,87 +1,198 @@
|
||||||
|
#!/usr/local/bin/python3
|
||||||
|
|
||||||
|
# tdp.py - tilde data in tilde data protocol format.
|
||||||
|
# Copyright 2015 Michael F. Lamb <http://datagrok.org>
|
||||||
|
# License: GPLv3+
|
||||||
|
|
||||||
|
"""
|
||||||
|
Outputs JSON data conforming to "~dp (Tilde Description Protocol)" as defined
|
||||||
|
at: http://protocol.club/~datagrok/beta-wiki/tdp.html
|
||||||
|
|
||||||
|
It is a JSON structure of the form:
|
||||||
|
|
||||||
|
{
|
||||||
|
'name': (string) the name of the server.
|
||||||
|
'url': (string) the URL of the server.
|
||||||
|
'signup_url': (string) the URL of a page describing the process required to request an account on the server.
|
||||||
|
'user_count': (number) the number of users currently registered on the server.
|
||||||
|
'want_users': (boolean) whether the server is currently accepting new user requests.
|
||||||
|
'admin_email': (string) the email address of the primary server administrator.
|
||||||
|
'description': (string) a free-form description for the server.
|
||||||
|
'users': [ (array) an array of users on the server.
|
||||||
|
{
|
||||||
|
'username': (string) the username of the user.
|
||||||
|
'title': (string) the HTML title of the user’s index.html page.
|
||||||
|
'mtime': (number) a timestamp representing the last time the user’s index.html was modified.
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
We also overload this with the preexisting data format we were using in
|
||||||
|
/var/local/tildetown/tildetown-py/stats.py, which is of the form:
|
||||||
|
|
||||||
|
{
|
||||||
|
'all_users': [ (array) of users on the server.
|
||||||
|
{
|
||||||
|
'username': (string) the username of the user.
|
||||||
|
'default': (boolean) Is the user still using their unmodified default index.html?
|
||||||
|
'favicon': (string) a url to an image representing the user
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
'num_users': (number) count of all_users
|
||||||
|
'live_users': [ (array) an array of live users, same format as all_users. Users may appear in both arrays.
|
||||||
|
...
|
||||||
|
],
|
||||||
|
'num_live_users': (number) count of live users
|
||||||
|
'active_user_count': (number) count of currently logged in users
|
||||||
|
'generated_at': (string) the time this JSON was generated in '%Y-%m-%d %H:%M:%S' format.
|
||||||
|
'generated_at_msec': (number) the time this JSON was generated, in milliseconds since the epoch.
|
||||||
|
'site_name': (same as 'name' above)
|
||||||
|
'site_url': (same as 'url' above)
|
||||||
|
'uptime': (string) output of `uptime -p`
|
||||||
|
|
||||||
|
}
|
||||||
|
Usage: tdp.py > /var/www/html/tilde.json
|
||||||
|
"""
|
||||||
|
|
||||||
|
# I suppose I could import /var/local/tildetown/tildetown-py/stats.py which
|
||||||
|
# does much of the same work, but I wanted to try to make one that needs no
|
||||||
|
# venv nor 'sh' module. (Success.) Bonus: this runs in 0.127s, vs 5.2s
|
||||||
|
# for 'stats'
|
||||||
|
|
||||||
|
# FIXME: we output quite a bit of redundant data. I think we should lose
|
||||||
|
# 'live_users' and do that filtering on the client side.
|
||||||
|
|
||||||
|
# FIXME: If we're the only consumer of the stats.py data, let's change the
|
||||||
|
# client side to use 'users' and drop 'all_users'.
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
from functools import partial
|
import os
|
||||||
from os import listdir
|
import pwd
|
||||||
from os.path import getmtime, join
|
import re
|
||||||
from datetime import datetime
|
import struct
|
||||||
from sh import find, uptime, who, sort, wc, cut
|
|
||||||
from tildetown.util import slurp, thread, p
|
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
# this script emits json on standard out that has information about tilde.town
|
SYSTEM_USERS = ['wiki', 'root', 'ubuntu', 'nate', 'nobody']
|
||||||
# users. It denotes who has not updated their page from the default. It also
|
|
||||||
# reports the time this script was run. The user list is sorted by public_html update time.
|
|
||||||
|
|
||||||
SYSTEM_USERS = ['wiki', 'root', 'ubuntu', 'nate']
|
|
||||||
|
|
||||||
DEFAULT_HTML_FILENAME = "/etc/skel/public_html/index.html"
|
DEFAULT_HTML_FILENAME = "/etc/skel/public_html/index.html"
|
||||||
|
title_re = re.compile(r'<title[^>]*>(.*)</title>', re.DOTALL)
|
||||||
|
|
||||||
username_to_html_path = lambda u: "/home/{}/public_html".format(u)
|
def active_user_count():
|
||||||
|
"""Return the count of unique usernames logged in."""
|
||||||
|
return len(set(line.split()[0] for line in
|
||||||
|
subprocess.check_output(
|
||||||
|
["who"], universal_newlines=True).splitlines()))
|
||||||
|
|
||||||
def default_p(username):
|
def get_title(indexhtml):
|
||||||
return subprocess.call(
|
"""Given an html file, return the content of its <title>"""
|
||||||
['diff', '-q', DEFAULT_HTML_FILENAME, user_html_filename],
|
with open(indexhtml, 'rt', errors='ignore') as fp:
|
||||||
stdout=subprocess.DEVNULL) == 0
|
title = title_re.search(fp.read())
|
||||||
|
if title:
|
||||||
|
return title.group(1)
|
||||||
|
|
||||||
def bounded_find(path):
|
def get_users():
|
||||||
# find might return 1 but still have worked fine.
|
"""Generate tuples of the form (username, homedir) for all normal
|
||||||
return find(path, "-maxdepth", "3", _ok_code=[0,1])
|
users on this system.
|
||||||
|
|
||||||
def get_active_user_count():
|
"""
|
||||||
return int(wc(sort(cut(who(), "-d", " ", "-f1"), "-u"), "-l"))
|
return ((p.pw_name, p.pw_dir) for p in pwd.getpwall() if
|
||||||
|
p.pw_uid >= 1000 and
|
||||||
|
p.pw_shell != '/bin/false' and
|
||||||
|
p.pw_name not in SYSTEM_USERS)
|
||||||
|
|
||||||
def guarded_mtime(path):
|
def most_recent_within(path):
|
||||||
try:
|
"""Return the most recent timestamp among all files within path, 3
|
||||||
return getmtime(path.rstrip())
|
levels deep.
|
||||||
except Exception as _:
|
"""
|
||||||
return 0
|
return max(modified_times(path), maxdepth=3)
|
||||||
|
|
||||||
def modify_time(username):
|
def modified_times(path, maxdepth=None):
|
||||||
files_to_mtimes = partial(map, guarded_mtime)
|
"""Walk the directories in path, generating timestamps for all
|
||||||
return thread(username,
|
files.
|
||||||
username_to_html_path,
|
"""
|
||||||
bounded_find,
|
for root, dirs, files in os.walk(path):
|
||||||
files_to_mtimes,
|
if maxdepth and len(root[len(path):].split(os.sep)) == maxdepth:
|
||||||
list,
|
dirs.clear()
|
||||||
max)
|
for f in files:
|
||||||
|
try:
|
||||||
|
yield os.path.getmtime(os.path.join(root, f))
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
|
||||||
def sort_user_list(usernames):
|
def tdp_user(username, homedir):
|
||||||
return sorted(usernames, key=modify_time)
|
"""Given a unix username, and their home directory, return a TDP format
|
||||||
|
dict with information about that user.
|
||||||
|
|
||||||
def user_generator():
|
"""
|
||||||
ignore_system_users = lambda un: un not in SYSTEM_USERS
|
public_html = os.path.join(homedir, 'public_html')
|
||||||
return filter(ignore_system_users, listdir("/home"))
|
index_html = os.path.join(public_html, 'index.html')
|
||||||
|
if os.path.exists(index_html):
|
||||||
|
return {
|
||||||
|
'username': username,
|
||||||
|
'title': get_title(index_html),
|
||||||
|
'mtime': int(most_recent_within(public_html) * 1000),
|
||||||
|
# tilde.town extensions and backward compatibility
|
||||||
|
# FIXME: just shelling out to diff -q might be way faster than all
|
||||||
|
# these hashes.
|
||||||
|
'favicon': 'TODO',
|
||||||
|
'default': subprocess.call(
|
||||||
|
['diff', '-q', DEFAULT_HTML_FILENAME, index_html],
|
||||||
|
stdout=subprocess.DEVNULL) == 0,
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
return {
|
||||||
|
'username': username,
|
||||||
|
'default': False
|
||||||
|
}
|
||||||
|
|
||||||
def get_user_data():
|
def tdp():
|
||||||
username_to_data = lambda u: {'username': u,
|
now = datetime.datetime.now()
|
||||||
'default': default_p(u),
|
users = [tdp_user(u, h) for u, h in get_users()]
|
||||||
'favicon':'TODO'}
|
|
||||||
live_p = lambda user: not user['default']
|
|
||||||
|
|
||||||
all_users = thread(user_generator(),
|
# TDP format data
|
||||||
sort_user_list,
|
data = {
|
||||||
reversed,
|
'name': 'tilde.town',
|
||||||
partial(map, username_to_data),
|
'url': 'http://tilde.town',
|
||||||
list)
|
'signup_url': 'http://goo.gl/forms/8IvQFTDjlo',
|
||||||
|
'want_users': True,
|
||||||
|
'admin_email': 'nks@lambdaphil.es',
|
||||||
|
'description': " ".join(l.strip() for l in """
|
||||||
|
an intentional digital community for creating and sharing works of
|
||||||
|
art, educating peers, and technological anachronism. we are a
|
||||||
|
completely non-commercial, donation supported, and committed to
|
||||||
|
rejecting false technological progress in favor of empathy and
|
||||||
|
sustainable computing.
|
||||||
|
""".splitlines()),
|
||||||
|
'user_count': len(users),
|
||||||
|
'users': users,
|
||||||
|
}
|
||||||
|
|
||||||
live_users = list(filter(live_p, all_users))
|
# tilde.town extensions and backward compatibility
|
||||||
|
data.update({
|
||||||
|
'active_user_count': active_user_count(),
|
||||||
|
'generated_at': now.strftime('%Y-%m-%d %H:%M:%S'),
|
||||||
|
'generated_at_msec': int(now.timestamp() * 1000),
|
||||||
|
'uptime': subprocess.check_output(['uptime', '-p'], universal_newlines=True),
|
||||||
|
})
|
||||||
|
# redundant entries we should drop after changing homepage template
|
||||||
|
data.update({
|
||||||
|
'all_users': data['users'],
|
||||||
|
'num_users': data['user_count'],
|
||||||
|
'live_users': [u for u in data['users'] if not u['default']],
|
||||||
|
'site_name': data['name'],
|
||||||
|
'site_url': data['url'],
|
||||||
|
})
|
||||||
|
data.update({
|
||||||
|
'num_live_users': len(data['live_users']),
|
||||||
|
})
|
||||||
|
|
||||||
active_user_count = get_active_user_count()
|
|
||||||
|
|
||||||
return {'all_users': all_users,
|
|
||||||
'num_users': len(all_users),
|
|
||||||
'num_live_users': len(live_users),
|
|
||||||
'active_user_count': active_user_count,
|
|
||||||
'live_users': live_users,}
|
|
||||||
|
|
||||||
def get_data():
|
|
||||||
user_data = get_user_data()
|
|
||||||
data = {'generated_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
|
|
||||||
'site_name': 'tilde.town',
|
|
||||||
'site_url': 'http://tilde.town',
|
|
||||||
'uptime': str(uptime('-p')).rstrip(),}
|
|
||||||
|
|
||||||
data.update(user_data)
|
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
def main():
|
||||||
|
print(json.dumps(tdp(), sort_keys=True, indent=2))
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(json.dumps(get_data()))
|
raise SystemExit(main())
|
||||||
|
|
Reference in New Issue