commit 4e3405727dfdfa3cf83f22cdff17cfa0c5f58d4f Author: Mike Lynch Date: Sat May 4 13:33:34 2024 +1000 Inital commit diff --git a/apub2gmi.py b/apub2gmi.py new file mode 100755 index 0000000..30d5cc6 --- /dev/null +++ b/apub2gmi.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python + +# convert the Glossatory archive from an ActivityPub collection to +# gemini + +import json +import datetime +import re +from pathlib import Path +from shutil import copy + +MNAMES = { + "01": "January", + "02": "February", + "03": "March", + "04": "April", + "05": "May", + "06": "June", + "07": "July", + "08": "August", + "09": "September", + "10": "October", + "11": "November", + "12": "December", +} + +ARCHIVE = "archive-20230604031441-05906d0df7f3f14777089c2fd7d0175a" +OUTDIR = "gemini" + +HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database. + +=> https://weirder.earth/@GLOSSATORY Follow the drawings on Mastodon +=> https://botsin.space/@GLOSSATORY Follow the words on Mastodon +=> https://oulipo.social/@GLOSSATORY Follow the words without the letter "e" +=> / Back to Mike's gemini +=> /glossatory/ Glossatory archive home + +""" + +URL_RE = re.compile(r"^/files\.weirder\.earth/(.*)$") +NAME_RES = [ + re.compile(r"^(.*?)\.\s*(.*)$"), + re.compile(r"^(.*?)\s*(The drawing.*)$"), + re.compile(r"^A line drawing depicting (.*)$"), + re.compile(r"^(.*?): (.*)$"), + ] + + + +class GlossatoryPost(): + def __init__(self, year, month, day, file, title): + self.year = year + self.month = month + self.day = day + self.file = file + self.fname = Path(file).name + self.title = title + self.defn = "" + self.desc = "" + self.try_parse() + + def copy_image(self, root): + d = Path(root) / self.year / self.month / self.day + target = d / self.fname + if not target.exists(): + copy(self.file, target) + + def try_parse(self): + for re in NAME_RES: + if m := re.match(self.title): + self.defn = m.group(1) + if len(m.groups()) == 2: + self.desc = m.group(2) + return + print(f"{self.file} Couldn't match title {self.title}") + self.defn = self.title + + +def process_post(obj): + date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d") + year = f"{date.year}" + month = f"{date.month:02}" + day = f"{date.day:02}" + attachment = obj["attachment"][0] + url = attachment["url"] + if m:= URL_RE.match(url): + file = Path(ARCHIVE) / m.group(1) + else: + raise ValueError(f"Couldn't match url {url}") + alt = attachment["name"] + return GlossatoryPost(year, month, day, file, alt) + + +def ensure_dir(gmdir): + if not gmdir.is_dir(): + gmdir.mkdir(parents=True) + + +def write_gemfile(gmdir, title, items): + ensure_dir(gmdir) + gmi = gmdir / "index.gmi" + with open(gmi, "w") as gfh: + gfh.write(HEADER) + gfh.write(f"# {title}\n\n") + for link, text in items: + gfh.write(f"=> {link} {text}\n") + + + + + +with open(f"{ARCHIVE}/outbox.json", "r") as fh: + js = json.load(fh) + posts = {} + for item in js["orderedItems"]: + if item["type"] == "Create": + try: + post = process_post(item["object"]) + if not post.year in posts: + posts[post.year] = {} + if not post.month in posts[post.year]: + posts[post.year][post.month] = {} + m = posts[post.year][post.month] + if not post.day in m: + m[post.day] = [ post ] + else: + m[post.day].append(post) + except Exception as e: + i = item["id"] + print(f"Processing failed: {i}: {e}") + + years = [ ( f"{year}/", year ) for year in posts ] + write_gemfile(Path(OUTDIR), "Glossatory", years) + + for year in posts: + ydir = Path(OUTDIR) / year + months = [ ( f"{month}/", MNAMES[month] ) for month in posts[year] ] + write_gemfile(ydir, year, months) + for month in posts[year]: + mname = MNAMES[month] + mdir = ydir / month + for day in posts[year][month]: + ddir = mdir / day + ddir.mkdir(parents=True, exist_ok=True) + for post in posts[year][month][day]: + post.copy_image(OUTDIR) + gmi = mdir / "index.gmi" + links = [ + ( f"/glossatory/{year}/", year), + ] + for day in posts[year][month]: + for post in posts[year][month][day]: + links.append((f"{day}/{post.fname}", post.title)) + write_gemfile(mdir, f"{mname} {year}", links) +