#!/usr/bin/env python # convert the Glossatory archive from an ActivityPub collection to # gemini import json import datetime import re from pathlib import Path from shutil import copy MNAMES = { "01": "January", "02": "February", "03": "March", "04": "April", "05": "May", "06": "June", "07": "July", "08": "August", "09": "September", "10": "October", "11": "November", "12": "December", } ARCHIVE = "archive-20230604031441-05906d0df7f3f14777089c2fd7d0175a" OUTDIR = "gemini" HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database. => https://weirder.earth/@GLOSSATORY Follow the drawings on Mastodon => https://botsin.space/@GLOSSATORY Follow the words on Mastodon => https://oulipo.social/@GLOSSATORY Follow the words without the letter "e" => / Back to Mike's gemini => /glossatory/ Glossatory archive home """ URL_RE = re.compile(r"^/files\.weirder\.earth/(.*)$") NAME_RES = [ re.compile(r"^(.*?)\.\s*(.*)$"), re.compile(r"^(.*?)\s*(The drawing.*)$"), re.compile(r"^A line drawing depicting (.*)$"), re.compile(r"^(.*?): (.*)$"), ] class GlossatoryPost(): def __init__(self, year, month, day, file, title): self.year = year self.month = month self.day = day self.file = file self.fname = Path(file).name self.title = title self.defn = "" self.desc = "" self.try_parse() def copy_image(self, root): d = Path(root) / self.year / self.month / self.day target = d / self.fname if not target.exists(): copy(self.file, target) def try_parse(self): for re in NAME_RES: if m := re.match(self.title): self.defn = m.group(1) if len(m.groups()) == 2: self.desc = m.group(2) return print(f"{self.file} Couldn't match title {self.title}") self.defn = self.title def process_post(obj): date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d") year = f"{date.year}" month = f"{date.month:02}" day = f"{date.day:02}" attachment = obj["attachment"][0] url = attachment["url"] if m:= URL_RE.match(url): file = Path(ARCHIVE) / m.group(1) else: raise ValueError(f"Couldn't match url {url}") alt = attachment["name"] return GlossatoryPost(year, month, day, file, alt) def ensure_dir(gmdir): if not gmdir.is_dir(): gmdir.mkdir(parents=True) def write_gemfile(gmdir, title, items): ensure_dir(gmdir) gmi = gmdir / "index.gmi" with open(gmi, "w") as gfh: gfh.write(HEADER) gfh.write(f"# {title}\n\n") for link, text in items: gfh.write(f"=> {link} {text}\n") with open(f"{ARCHIVE}/outbox.json", "r") as fh: js = json.load(fh) posts = {} for item in js["orderedItems"]: if item["type"] == "Create": try: post = process_post(item["object"]) if not post.year in posts: posts[post.year] = {} if not post.month in posts[post.year]: posts[post.year][post.month] = {} m = posts[post.year][post.month] if not post.day in m: m[post.day] = [ post ] else: m[post.day].append(post) except Exception as e: i = item["id"] print(f"Processing failed: {i}: {e}") years = [ ( f"{year}/", year ) for year in posts ] write_gemfile(Path(OUTDIR), "Glossatory", years) for year in posts: ydir = Path(OUTDIR) / year months = [ ( f"{month}/", MNAMES[month] ) for month in posts[year] ] write_gemfile(ydir, year, months) for month in posts[year]: mname = MNAMES[month] mdir = ydir / month for day in posts[year][month]: ddir = mdir / day ddir.mkdir(parents=True, exist_ok=True) for post in posts[year][month][day]: post.copy_image(OUTDIR) gmi = mdir / "index.gmi" links = [ ( f"/glossatory/{year}/", year), ] for day in posts[year][month]: for post in posts[year][month][day]: links.append((f"{day}/{post.fname}", post.title)) write_gemfile(mdir, f"{mname} {year}", links)