#!/usr/bin/env python

# convert the Glossatory archive from an ActivityPub collection to 
# gemini

import argparse
import datetime
import json
import re
from pathlib import Path
from shutil import copy

MNAMES = {
	"01": "January",
	"02": "February",
	"03": "March",
	"04": "April",
	"05": "May",
	"06": "June",
	"07": "July",
	"08": "August",
	"09": "September",
	"10": "October",
	"11": "November",
	"12": "December",
}


HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database.

=> https://weirder.earth/@GLOSSATORY Follow the drawings on Mastodon
=> https://botsin.space/@GLOSSATORY Follow the words on Mastodon
=> https://oulipo.social/@GLOSSATORY Follow the words without the letter "e"
=> / Back to Mike's gemini
=> /glossatory/ Glossatory archive home

"""

URL_RE = re.compile(r"^/weirderearth/(.*)$")
NAME_RES = [
	re.compile(r"^(.*?)\.\s*(.*)$"),
	re.compile(r"^(.*?)\s*(The drawing.*)$"),
	re.compile(r"^A line drawing depicting (.*)$"),
	re.compile(r"^(.*?): (.*)$"),
	]


class MediaPost():
	def __init__(self, year, month, day, file, title):
		self.year = year
		self.month = month
		self.day = day
		self.file = file
		self.fname = Path(file).name
		self.title = title
		self.defn = ""
		self.desc = ""
		self.try_parse()

	def copy_image(self, root):
		d = Path(root) / self.year / self.month / self.day
		target = d / self.fname
		if not target.exists():
			copy(self.file, target)

	def try_parse(self):
		for re in NAME_RES:
			if m := re.match(self.title):
				self.defn = m.group(1)
				if len(m.groups()) == 2:
					self.desc = m.group(2)
				return
		print(f"{self.file} Couldn't match title {self.title}")
		self.defn = self.title


def process_post(archive, obj):
	date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
	year = f"{date.year}"
	month = f"{date.month:02}"
	day = f"{date.day:02}"
	attachment = obj["attachment"][0]
	url = attachment["url"]
	if m:= URL_RE.match(url):
		file = Path(archive) / m.group(1)
	else:
		raise ValueError(f"Couldn't match url {url}")
	alt = attachment["name"]
	return MediaPost(year, month, day, file, alt)


def ensure_dir(gmdir):
	if not gmdir.is_dir():
		gmdir.mkdir(parents=True)


def load_colophon(cfile):
	if cfile:
		with open(cfile, "r") as cfh:
			colophon = cfh.readlines()
			return "".join(colophon)
	return None

def write_gemfile(gmdir, colophon, title, items):
	ensure_dir(gmdir)
	gmi = gmdir / "index.gmi"
	with open(gmi, "w") as gfh:
		if colophon:
			gfh.write(colophon)
			gfh.write("\n\n")
		gfh.write(f"# {title}\n\n")
		for link, text in items:
			gfh.write(f"=> {link} {text}\n")


def apub2gmi(archive, output, colophon):
	with open(f"{archive}/outbox.json", "r") as fh:
		js = json.load(fh)
		posts = {}
		for item in js["orderedItems"]:
			if item["type"] == "Create":
				try:
					post = process_post(archive, item["object"])
					if not post.year in posts:
						posts[post.year] = {}
					if not post.month in posts[post.year]:
						posts[post.year][post.month] = {}
					m = posts[post.year][post.month]
					if not post.day in m:
						m[post.day] = [ post ]
					else:
						m[post.day].append(post)
				except Exception as e:
					i = item["id"]
					print(f"Processing failed: {i}: {e}")

		years = [ ( f"{year}/", year ) for year in posts ]
		write_gemfile(Path(output), colophon, "Glossatory", years)

		for year in posts:
			ydir = Path(output) / year
			months = [ ( f"{month}/", MNAMES[month] ) for month in posts[year] ]
			write_gemfile(ydir, colophon, year, months)
			for month in posts[year]:
				mname = MNAMES[month]
				mdir = ydir / month
				for day in posts[year][month]:
					ddir = mdir / day
					ddir.mkdir(parents=True, exist_ok=True)
					for post in posts[year][month][day]:
						post.copy_image(output)
				gmi = mdir / "index.gmi"
				links = [
					( f"/glossatory/{year}/", year),
				]
				for day in posts[year][month]:
					for post in posts[year][month][day]:
						links.append((f"{day}/{post.fname}", post.title))
				write_gemfile(mdir, colophon, f"{mname} {year}", links)


if __name__ == "__main__":
	ap = argparse.ArgumentParser()
	ap.add_argument(
		'-a', '--archive', required=True, type=str, help="ActivityPub archive"
		)
	ap.add_argument(
		'-o', '--output', required=True, type=str, help="Output directory"
		)
	ap.add_argument(
		'-c', '--colophon', required=False, type=str,
		help="File with text to be included at the top of each index page"
		)
	args = ap.parse_args()
	colophon = load_colophon(args.colophon)
	apub2gmi(args.archive, args.output, colophon)