3 changed files with 30 additions and 34 deletions
--- a/README.md
+++ b/README.md
@ -2,6 +2,4 @@
 This is a script which takes an archive exported from a Mastodon account, looks for media attachments and uses them to build an archive for a Gemini server.
-I use it to update the [Glossatory archives](gemini://gemini.mikelynch.org/glossatory/) and it assumes that all of the media attachments have a name with
+I use it to update the [Glossatory archives](gemini://gemini.mikelynch.org/glossatory/) and there's still a few things hard-coded in it which I haven't moved out to a config file, specifically the regular expressions which match attachment URLs and pull bits of text out for the index links.
 two parts you want to pull out of it and use as the human-readable form of the
 index.
--- a/apub2gmi.py
+++ b/apub2gmi.py
@ -26,10 +26,28 @@ MNAMES = {
 }
 HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database.
 => https://weirder.earth/@GLOSSATORY Follow the drawings on Mastodon
 => https://botsin.space/@GLOSSATORY Follow the words on Mastodon
 => https://oulipo.social/@GLOSSATORY Follow the words without the letter "e"
 => / Back to Mike's gemini
 => /glossatory/ Glossatory archive home
 """
 URL_RE = re.compile(r"^/weirderearth/(.*)$")
 NAME_RES = [
 	re.compile(r"^(.*?)\.\s*(.*)$"),
 	re.compile(r"^(.*?)\s*(The drawing.*)$"),
 	re.compile(r"^A line drawing depicting (.*)$"),
 	re.compile(r"^(.*?): (.*)$"),
 	]
 class MediaPost():
-	def __init__(self, name_res, year, month, day, file, title):
+	def __init__(self, year, month, day, file, title):
 		self.name_res = name_res
 		self.year = year
 		self.month = month
 		self.day = day
@ -47,7 +65,7 @@ class MediaPost():
 			copy(self.file, target)
 	def try_parse(self):
-		for re in self.name_res:
+		for re in NAME_RES:
 			if m := re.match(self.title):
 				self.defn = m.group(1)
 				if len(m.groups()) == 2:
@ -57,19 +75,19 @@ class MediaPost():
 		self.defn = self.title
-def process_post(cf, archive, obj):
+def process_post(archive, obj):
 	date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
 	year = f"{date.year}"
 	month = f"{date.month:02}"
 	day = f"{date.day:02}"
 	attachment = obj["attachment"][0]
 	url = attachment["url"]
-	if m:= cf["url_re"].match(url):
+	if m:= URL_RE.match(url):
 		file = Path(archive) / m.group(1)
 	else:
 		raise ValueError(f"Couldn't match url {url}")
 	alt = attachment["name"]
-	return MediaPost(cf["title_res"], year, month, day, file, alt)
+	return MediaPost(year, month, day, file, alt)
 def ensure_dir(gmdir):
@ -84,15 +102,6 @@ def load_colophon(cfile):
 			return "".join(colophon)
 	return None
 def load_config(config):
 	with open(config, "r") as cfh:
 		cf = json.load(cfh)
 		cf["url_re"] = re.compile(cf["url_re"])
 		cf["title_res"] = [ re.compile(r) for r in cf["title_res"] ]
 	return cf
 def write_gemfile(gmdir, colophon, title, items):
 	ensure_dir(gmdir)
 	gmi = gmdir / "index.gmi"
@ -105,14 +114,14 @@ def write_gemfile(gmdir, colophon, title, items):
 			gfh.write(f"=> {link} {text}\n")
-def apub2gmi(cf, archive, output, colophon):
+def apub2gmi(archive, output, colophon):
 	with open(f"{archive}/outbox.json", "r") as fh:
 		js = json.load(fh)
 		posts = {}
 		for item in js["orderedItems"]:
 			if item["type"] == "Create":
 				try:
-					post = process_post(cf, archive, item["object"])
+					post = process_post(archive, item["object"])
 					if not post.year in posts:
 						posts[post.year] = {}
 					if not post.month in posts[post.year]:
@ -160,13 +169,9 @@ if __name__ == "__main__":
 		'-o', '--output', required=True, type=str, help="Output directory"
 		)
 	ap.add_argument(
-		'-c', '--config', required=True, type=str, help="Config file"
+		'-c', '--colophon', required=False, type=str,
 	)
 	ap.add_argument(
 		'-t', '--text', required=False, type=str,
 		help="File with text to be included at the top of each index page"
 		)
 	args = ap.parse_args()
-	cf = load_config(args.config)
+	colophon = load_colophon(args.colophon)
-	colophon = load_colophon(args.text)
+	apub2gmi(args.archive, args.output, colophon)
 	apub2gmi(cf, args.archive, args.output, colophon)
--- a/config.json
+++ b/config.json
@ -1,7 +0,0 @@
 {
 	"url_re": "^/some_pattern/(.*)$",
 	"title_res": [
 		"^(.*?)\\.\\s*(.*)$",
 		"^(.*?): (.*)$"
 	]
 }