173 lines
4.2 KiB
Python
Executable File
173 lines
4.2 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
# convert the Glossatory archive from an ActivityPub collection to
|
|
# gemini
|
|
|
|
import argparse
|
|
import datetime
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from shutil import copy
|
|
|
|
MNAMES = {
|
|
"01": "January",
|
|
"02": "February",
|
|
"03": "March",
|
|
"04": "April",
|
|
"05": "May",
|
|
"06": "June",
|
|
"07": "July",
|
|
"08": "August",
|
|
"09": "September",
|
|
"10": "October",
|
|
"11": "November",
|
|
"12": "December",
|
|
}
|
|
|
|
|
|
|
|
class MediaPost():
|
|
def __init__(self, name_res, year, month, day, file, title):
|
|
self.name_res = name_res
|
|
self.year = year
|
|
self.month = month
|
|
self.day = day
|
|
self.file = file
|
|
self.fname = Path(file).name
|
|
self.title = title
|
|
self.defn = ""
|
|
self.desc = ""
|
|
self.try_parse()
|
|
|
|
def copy_image(self, root):
|
|
d = Path(root) / self.year / self.month / self.day
|
|
target = d / self.fname
|
|
if not target.exists():
|
|
copy(self.file, target)
|
|
|
|
def try_parse(self):
|
|
for re in self.name_res:
|
|
if m := re.match(self.title):
|
|
self.defn = m.group(1)
|
|
if len(m.groups()) == 2:
|
|
self.desc = m.group(2)
|
|
return
|
|
print(f"{self.file} Couldn't match title {self.title}")
|
|
self.defn = self.title
|
|
|
|
|
|
def process_post(cf, archive, obj):
|
|
date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
|
|
year = f"{date.year}"
|
|
month = f"{date.month:02}"
|
|
day = f"{date.day:02}"
|
|
attachment = obj["attachment"][0]
|
|
url = attachment["url"]
|
|
if m:= cf["url_re"].match(url):
|
|
file = Path(archive) / m.group(1)
|
|
else:
|
|
raise ValueError(f"Couldn't match url {url}")
|
|
alt = attachment["name"]
|
|
return MediaPost(cf["title_res"], year, month, day, file, alt)
|
|
|
|
|
|
def ensure_dir(gmdir):
|
|
if not gmdir.is_dir():
|
|
gmdir.mkdir(parents=True)
|
|
|
|
|
|
def load_colophon(cfile):
|
|
if cfile:
|
|
with open(cfile, "r") as cfh:
|
|
colophon = cfh.readlines()
|
|
return "".join(colophon)
|
|
return None
|
|
|
|
|
|
def load_config(config):
|
|
with open(config, "r") as cfh:
|
|
cf = json.load(cfh)
|
|
cf["url_re"] = re.compile(cf["url_re"])
|
|
cf["title_res"] = [ re.compile(r) for r in cf["title_res"] ]
|
|
return cf
|
|
|
|
|
|
def write_gemfile(gmdir, colophon, title, items):
|
|
ensure_dir(gmdir)
|
|
gmi = gmdir / "index.gmi"
|
|
with open(gmi, "w") as gfh:
|
|
if colophon:
|
|
gfh.write(colophon)
|
|
gfh.write("\n\n")
|
|
gfh.write(f"# {title}\n\n")
|
|
for link, text in items:
|
|
gfh.write(f"=> {link} {text}\n")
|
|
|
|
|
|
def apub2gmi(cf, archive, output, colophon):
|
|
with open(f"{archive}/outbox.json", "r") as fh:
|
|
js = json.load(fh)
|
|
posts = {}
|
|
for item in js["orderedItems"]:
|
|
if item["type"] == "Create":
|
|
try:
|
|
post = process_post(cf, archive, item["object"])
|
|
if not post.year in posts:
|
|
posts[post.year] = {}
|
|
if not post.month in posts[post.year]:
|
|
posts[post.year][post.month] = {}
|
|
m = posts[post.year][post.month]
|
|
if not post.day in m:
|
|
m[post.day] = [ post ]
|
|
else:
|
|
m[post.day].append(post)
|
|
except Exception as e:
|
|
i = item["id"]
|
|
print(f"Processing failed: {i}: {e}")
|
|
|
|
years = [ ( f"{year}/", year ) for year in posts ]
|
|
write_gemfile(Path(output), colophon, "Glossatory", years)
|
|
|
|
for year in posts:
|
|
ydir = Path(output) / year
|
|
months = [ ( f"{month}/", MNAMES[month] ) for month in posts[year] ]
|
|
write_gemfile(ydir, colophon, year, months)
|
|
for month in posts[year]:
|
|
mname = MNAMES[month]
|
|
mdir = ydir / month
|
|
for day in posts[year][month]:
|
|
ddir = mdir / day
|
|
ddir.mkdir(parents=True, exist_ok=True)
|
|
for post in posts[year][month][day]:
|
|
post.copy_image(output)
|
|
gmi = mdir / "index.gmi"
|
|
links = [
|
|
( f"/glossatory/{year}/", year),
|
|
]
|
|
for day in posts[year][month]:
|
|
for post in posts[year][month][day]:
|
|
links.append((f"{day}/{post.fname}", post.title))
|
|
write_gemfile(mdir, colophon, f"{mname} {year}", links)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument(
|
|
'-a', '--archive', required=True, type=str, help="ActivityPub archive"
|
|
)
|
|
ap.add_argument(
|
|
'-o', '--output', required=True, type=str, help="Output directory"
|
|
)
|
|
ap.add_argument(
|
|
'-c', '--config', required=True, type=str, help="Config file"
|
|
)
|
|
ap.add_argument(
|
|
'-t', '--text', required=False, type=str,
|
|
help="File with text to be included at the top of each index page"
|
|
)
|
|
args = ap.parse_args()
|
|
cf = load_config(args.config)
|
|
colophon = load_colophon(args.text)
|
|
apub2gmi(cf, args.archive, args.output, colophon)
|