193 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			193 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/env python
 | |
| 
 | |
| # convert the Glossatory archive from an ActivityPub collection to 
 | |
| # gemini
 | |
| 
 | |
| import argparse
 | |
| import datetime
 | |
| import json
 | |
| import re
 | |
| from pathlib import Path
 | |
| from shutil import copy
 | |
| import sys
 | |
| 
 | |
| MNAMES = {
 | |
| 	"01": "January",
 | |
| 	"02": "February",
 | |
| 	"03": "March",
 | |
| 	"04": "April",
 | |
| 	"05": "May",
 | |
| 	"06": "June",
 | |
| 	"07": "July",
 | |
| 	"08": "August",
 | |
| 	"09": "September",
 | |
| 	"10": "October",
 | |
| 	"11": "November",
 | |
| 	"12": "December",
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| class MediaPost():
 | |
| 	def __init__(self, year, month, day, file, title, title_res=None):
 | |
| 		self.year = year
 | |
| 		self.month = month
 | |
| 		self.day = day
 | |
| 		self.file = file
 | |
| 		self.fname = Path(file).name
 | |
| 		self.title = title
 | |
| 		self.title_res = title_res
 | |
| 		self.defn = ""
 | |
| 		self.desc = ""
 | |
| 		if self.title_res:
 | |
| 			self.try_parse_title()
 | |
| 
 | |
| 	def __str__(self):
 | |
| 		return f"{self.year}-{self.month}-{self.day}: {self.file}"
 | |
| 
 | |
| 	def copy_image(self, root):
 | |
| 		d = Path(root) / self.year / self.month / self.day
 | |
| 		target = d / self.fname
 | |
| 		try:
 | |
| 			if not target.exists():
 | |
| 				copy(self.file, target)
 | |
| 		except FileNotFoundError as e:
 | |
| 			print(f"Image file missing: {self}", file=sys.stderr)
 | |
| 
 | |
| 	def try_parse_title(self):
 | |
| 		for re in self.title_res:
 | |
| 			if m := re.match(self.title):
 | |
| 				self.title = ' '.join(m.groups())
 | |
| 				return
 | |
| 		print(f"{self.file} Couldn't match alt text {self.title}", file=sys.stderr)
 | |
| 
 | |
| 
 | |
| def process_post(cf, archive, obj, debug=False):
 | |
| 	if debug:
 | |
| 		print(f"Processing {obj}", file=sys.stderr)
 | |
| 	date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
 | |
| 	year = f"{date.year}"
 | |
| 	month = f"{date.month:02}"
 | |
| 	day = f"{date.day:02}"
 | |
| 	if "attachment" not in obj or len(obj["attachment"]) < 1:
 | |
| 		raise ValueError('No attachments on this status')
 | |
| 	if len(obj["attachment"]) > 1:
 | |
| 		status_id = obj["id"]
 | |
| 		n = len(obj["attachment"])
 | |
| 		print(f"Warning: only one media item copied from post {status_id} which has {n}", file=sys.stderr)
 | |
| 	attachment = obj["attachment"][0]
 | |
| 	url = attachment["url"]
 | |
| 	if m:= cf["url_re"].match(url):
 | |
| 		file = Path(archive) / m.group(1)
 | |
| 	else:
 | |
| 		raise ValueError(f"Couldn't match url {url}")
 | |
| 	alt = attachment["name"]
 | |
| 	return MediaPost(year, month, day, file, alt, cf.get("title_res", None))
 | |
| 
 | |
| 
 | |
| def ensure_dir(gmdir):
 | |
| 	if not gmdir.is_dir():
 | |
| 		gmdir.mkdir(parents=True)
 | |
| 
 | |
| 
 | |
| def load_colophon(cfile):
 | |
| 	if cfile:
 | |
| 		with open(cfile, "r") as cfh:
 | |
| 			colophon = cfh.readlines()
 | |
| 			return "".join(colophon)
 | |
| 	return None
 | |
| 
 | |
| 
 | |
| def load_config(config):
 | |
| 	with open(config, "r") as cfh:
 | |
| 		cf = json.load(cfh)
 | |
| 		cf["url_re"] = re.compile(cf["url_re"])
 | |
| 		if "title_res" in cf:
 | |
| 			cf["title_res"] = [ re.compile(r) for r in cf["title_res"] ]
 | |
| 	return cf
 | |
| 
 | |
| 
 | |
| def write_gemfile(gmdir, colophon, title, items):
 | |
| 	ensure_dir(gmdir)
 | |
| 	gmi = gmdir / "index.gmi"
 | |
| 	with open(gmi, "w") as gfh:
 | |
| 		if colophon:
 | |
| 			gfh.write(colophon)
 | |
| 			gfh.write("\n\n")
 | |
| 		gfh.write(f"# {title}\n\n")
 | |
| 		for link, text in items:
 | |
| 			gfh.write(f"=> {link} {text}\n")
 | |
| 
 | |
| 
 | |
| def apub2gmi(cf, archive, output, colophon, debug=False):
 | |
| 	with open(f"{archive}/outbox.json", "r") as fh:
 | |
| 		js = json.load(fh)
 | |
| 		posts = {}
 | |
| 		for item in js["orderedItems"]:
 | |
| 			if item["type"] == "Create":
 | |
| 				if debug:
 | |
| 					print(item)
 | |
| 				try:
 | |
| 					post = process_post(cf, archive, item["object"], debug)
 | |
| 					if not post.year in posts:
 | |
| 						posts[post.year] = {}
 | |
| 					if not post.month in posts[post.year]:
 | |
| 						posts[post.year][post.month] = {}
 | |
| 					m = posts[post.year][post.month]
 | |
| 					if not post.day in m:
 | |
| 						m[post.day] = [ post ]
 | |
| 					else:
 | |
| 						m[post.day].append(post)
 | |
| 				except Exception as e:
 | |
| 					i = item["id"]
 | |
| 					print(f"Processing failed: {i}: {e}")
 | |
| 
 | |
| 		years = [ ( f"{year}/", year ) for year in posts ]
 | |
| 		write_gemfile(Path(output), colophon, "Glossatory", years)
 | |
| 
 | |
| 		for year in posts:
 | |
| 			ydir = Path(output) / year
 | |
| 			months = [ ( f"{month}/", MNAMES[month] ) for month in posts[year] ]
 | |
| 			write_gemfile(ydir, colophon, year, months)
 | |
| 			for month in posts[year]:
 | |
| 				mname = MNAMES[month]
 | |
| 				mdir = ydir / month
 | |
| 				for day in posts[year][month]:
 | |
| 					ddir = mdir / day
 | |
| 					ddir.mkdir(parents=True, exist_ok=True)
 | |
| 					for post in posts[year][month][day]:
 | |
| 						post.copy_image(output)
 | |
| 				gmi = mdir / "index.gmi"
 | |
| 				links = [
 | |
| 					( f"/glossatory/{year}/", year),
 | |
| 				]
 | |
| 				for day in posts[year][month]:
 | |
| 					for post in posts[year][month][day]:
 | |
| 						links.append((f"{day}/{post.fname}", post.title))
 | |
| 				write_gemfile(mdir, colophon, f"{mname} {year}", links)
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
| 	ap = argparse.ArgumentParser()
 | |
| 	ap.add_argument(
 | |
| 		'-a', '--archive', required=True, type=str, help="ActivityPub archive"
 | |
| 		)
 | |
| 	ap.add_argument(
 | |
| 		'-o', '--output', required=True, type=str, help="Output directory"
 | |
| 		)
 | |
| 	ap.add_argument(
 | |
| 		'-c', '--config', required=True, type=str, help="Config file"
 | |
| 	)
 | |
| 	ap.add_argument(
 | |
| 		'-t', '--text', required=False, type=str,
 | |
| 		help="File with text to be included at the top of each index page"
 | |
| 		)
 | |
| 	ap.add_argument(
 | |
| 		'-d', '--debug', action="store_true", default=False,
 | |
| 		help="Print debug output"
 | |
| 		)
 | |
| 	args = ap.parse_args()
 | |
| 	cf = load_config(args.config)
 | |
| 	colophon = load_colophon(args.text)
 | |
| 	apub2gmi(cf, args.archive, args.output, colophon, args.debug)
 |