apub2gmi/apub2gmi.py

173 lines
4.2 KiB
Python
Executable File

#!/usr/bin/env python
# convert the Glossatory archive from an ActivityPub collection to
# gemini
import argparse
import datetime
import json
import re
from pathlib import Path
from shutil import copy
MNAMES = {
"01": "January",
"02": "February",
"03": "March",
"04": "April",
"05": "May",
"06": "June",
"07": "July",
"08": "August",
"09": "September",
"10": "October",
"11": "November",
"12": "December",
}
class MediaPost():
def __init__(self, name_res, year, month, day, file, title):
self.name_res = name_res
self.year = year
self.month = month
self.day = day
self.file = file
self.fname = Path(file).name
self.title = title
self.defn = ""
self.desc = ""
self.try_parse()
def copy_image(self, root):
d = Path(root) / self.year / self.month / self.day
target = d / self.fname
if not target.exists():
copy(self.file, target)
def try_parse(self):
for re in self.name_res:
if m := re.match(self.title):
self.defn = m.group(1)
if len(m.groups()) == 2:
self.desc = m.group(2)
return
print(f"{self.file} Couldn't match title {self.title}")
self.defn = self.title
def process_post(cf, archive, obj):
date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
year = f"{date.year}"
month = f"{date.month:02}"
day = f"{date.day:02}"
attachment = obj["attachment"][0]
url = attachment["url"]
if m:= cf["url_re"].match(url):
file = Path(archive) / m.group(1)
else:
raise ValueError(f"Couldn't match url {url}")
alt = attachment["name"]
return MediaPost(cf["title_res"], year, month, day, file, alt)
def ensure_dir(gmdir):
if not gmdir.is_dir():
gmdir.mkdir(parents=True)
def load_colophon(cfile):
if cfile:
with open(cfile, "r") as cfh:
colophon = cfh.readlines()
return "".join(colophon)
return None
def load_config(config):
with open(config, "r") as cfh:
cf = json.load(cfh)
cf["url_re"] = re.compile(cf["url_re"])
cf["title_res"] = [ re.compile(r) for r in cf["title_res"] ]
return cf
def write_gemfile(gmdir, colophon, title, items):
ensure_dir(gmdir)
gmi = gmdir / "index.gmi"
with open(gmi, "w") as gfh:
if colophon:
gfh.write(colophon)
gfh.write("\n\n")
gfh.write(f"# {title}\n\n")
for link, text in items:
gfh.write(f"=> {link} {text}\n")
def apub2gmi(cf, archive, output, colophon):
with open(f"{archive}/outbox.json", "r") as fh:
js = json.load(fh)
posts = {}
for item in js["orderedItems"]:
if item["type"] == "Create":
try:
post = process_post(cf, archive, item["object"])
if not post.year in posts:
posts[post.year] = {}
if not post.month in posts[post.year]:
posts[post.year][post.month] = {}
m = posts[post.year][post.month]
if not post.day in m:
m[post.day] = [ post ]
else:
m[post.day].append(post)
except Exception as e:
i = item["id"]
print(f"Processing failed: {i}: {e}")
years = [ ( f"{year}/", year ) for year in posts ]
write_gemfile(Path(output), colophon, "Glossatory", years)
for year in posts:
ydir = Path(output) / year
months = [ ( f"{month}/", MNAMES[month] ) for month in posts[year] ]
write_gemfile(ydir, colophon, year, months)
for month in posts[year]:
mname = MNAMES[month]
mdir = ydir / month
for day in posts[year][month]:
ddir = mdir / day
ddir.mkdir(parents=True, exist_ok=True)
for post in posts[year][month][day]:
post.copy_image(output)
gmi = mdir / "index.gmi"
links = [
( f"/glossatory/{year}/", year),
]
for day in posts[year][month]:
for post in posts[year][month][day]:
links.append((f"{day}/{post.fname}", post.title))
write_gemfile(mdir, colophon, f"{mname} {year}", links)
if __name__ == "__main__":
ap = argparse.ArgumentParser()
ap.add_argument(
'-a', '--archive', required=True, type=str, help="ActivityPub archive"
)
ap.add_argument(
'-o', '--output', required=True, type=str, help="Output directory"
)
ap.add_argument(
'-c', '--config', required=True, type=str, help="Config file"
)
ap.add_argument(
'-t', '--text', required=False, type=str,
help="File with text to be included at the top of each index page"
)
args = ap.parse_args()
cf = load_config(args.config)
colophon = load_colophon(args.text)
apub2gmi(cf, args.archive, args.output, colophon)