#!/usr/bin/env python # convert the Glossatory archive from an ActivityPub collection to # gemini import argparse import datetime import json import re from pathlib import Path from shutil import copy MNAMES = { "01": "January", "02": "February", "03": "March", "04": "April", "05": "May", "06": "June", "07": "July", "08": "August", "09": "September", "10": "October", "11": "November", "12": "December", } HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database. => https://weirder.earth/@GLOSSATORY Follow the drawings on Mastodon => https://botsin.space/@GLOSSATORY Follow the words on Mastodon => https://oulipo.social/@GLOSSATORY Follow the words without the letter "e" => / Back to Mike's gemini => /glossatory/ Glossatory archive home """ URL_RE = re.compile(r"^/weirderearth/(.*)$") NAME_RES = [ re.compile(r"^(.*?)\.\s*(.*)$"), re.compile(r"^(.*?)\s*(The drawing.*)$"), re.compile(r"^A line drawing depicting (.*)$"), re.compile(r"^(.*?): (.*)$"), ] class MediaPost(): def __init__(self, year, month, day, file, title): self.year = year self.month = month self.day = day self.file = file self.fname = Path(file).name self.title = title self.defn = "" self.desc = "" self.try_parse() def copy_image(self, root): d = Path(root) / self.year / self.month / self.day target = d / self.fname if not target.exists(): copy(self.file, target) def try_parse(self): for re in NAME_RES: if m := re.match(self.title): self.defn = m.group(1) if len(m.groups()) == 2: self.desc = m.group(2) return print(f"{self.file} Couldn't match title {self.title}") self.defn = self.title def process_post(archive, obj): date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d") year = f"{date.year}" month = f"{date.month:02}" day = f"{date.day:02}" attachment = obj["attachment"][0] url = attachment["url"] if m:= URL_RE.match(url): file = Path(archive) / m.group(1) else: raise ValueError(f"Couldn't match url {url}") alt = attachment["name"] return MediaPost(year, month, day, file, alt) def ensure_dir(gmdir): if not gmdir.is_dir(): gmdir.mkdir(parents=True) def load_colophon(cfile): if cfile: with open(cfile, "r") as cfh: colophon = cfh.readlines() return "".join(colophon) return None def write_gemfile(gmdir, colophon, title, items): ensure_dir(gmdir) gmi = gmdir / "index.gmi" with open(gmi, "w") as gfh: if colophon: gfh.write(colophon) gfh.write("\n\n") gfh.write(f"# {title}\n\n") for link, text in items: gfh.write(f"=> {link} {text}\n") def apub2gmi(archive, output, colophon): with open(f"{archive}/outbox.json", "r") as fh: js = json.load(fh) posts = {} for item in js["orderedItems"]: if item["type"] == "Create": try: post = process_post(archive, item["object"]) if not post.year in posts: posts[post.year] = {} if not post.month in posts[post.year]: posts[post.year][post.month] = {} m = posts[post.year][post.month] if not post.day in m: m[post.day] = [ post ] else: m[post.day].append(post) except Exception as e: i = item["id"] print(f"Processing failed: {i}: {e}") years = [ ( f"{year}/", year ) for year in posts ] write_gemfile(Path(output), colophon, "Glossatory", years) for year in posts: ydir = Path(output) / year months = [ ( f"{month}/", MNAMES[month] ) for month in posts[year] ] write_gemfile(ydir, colophon, year, months) for month in posts[year]: mname = MNAMES[month] mdir = ydir / month for day in posts[year][month]: ddir = mdir / day ddir.mkdir(parents=True, exist_ok=True) for post in posts[year][month][day]: post.copy_image(output) gmi = mdir / "index.gmi" links = [ ( f"/glossatory/{year}/", year), ] for day in posts[year][month]: for post in posts[year][month][day]: links.append((f"{day}/{post.fname}", post.title)) write_gemfile(mdir, colophon, f"{mname} {year}", links) if __name__ == "__main__": ap = argparse.ArgumentParser() ap.add_argument( '-a', '--archive', required=True, type=str, help="ActivityPub archive" ) ap.add_argument( '-o', '--output', required=True, type=str, help="Output directory" ) ap.add_argument( '-c', '--colophon', required=False, type=str, help="File with text to be included at the top of each index page" ) args = ap.parse_args() colophon = load_colophon(args.colophon) apub2gmi(args.archive, args.output, colophon)