3 changed files with 30 additions and 34 deletions
--- a/README.md
+++ b/README.md
@ -2,6 +2,4 @@

 This is a script which takes an archive exported from a Mastodon account, looks for media attachments and uses them to build an archive for a Gemini server.

-I use it to update the [Glossatory archives](gemini://gemini.mikelynch.org/glossatory/) and it assumes that all of the media attachments have a name with
-two parts you want to pull out of it and use as the human-readable form of the
-index.
+I use it to update the [Glossatory archives](gemini://gemini.mikelynch.org/glossatory/) and there's still a few things hard-coded in it which I haven't moved out to a config file, specifically the regular expressions which match attachment URLs and pull bits of text out for the index links.
--- a/apub2gmi.py
+++ b/apub2gmi.py
@ -26,10 +26,28 @@ MNAMES = {
 }


+HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database.
+
+=> https://weirder.earth/@GLOSSATORY Follow the drawings on Mastodon
+=> https://botsin.space/@GLOSSATORY Follow the words on Mastodon
+=> https://oulipo.social/@GLOSSATORY Follow the words without the letter "e"
+=> / Back to Mike's gemini
+=> /glossatory/ Glossatory archive home
+
+"""
+
+URL_RE = re.compile(r"^/weirderearth/(.*)$")
+NAME_RES = [
+	re.compile(r"^(.*?)\.\s*(.*)$"),
+	re.compile(r"^(.*?)\s*(The drawing.*)$"),
+	re.compile(r"^A line drawing depicting (.*)$"),
+	re.compile(r"^(.*?): (.*)$"),
+	]
+
+

 class MediaPost():
-	def __init__(self, name_res, year, month, day, file, title):
-		self.name_res = name_res
+	def __init__(self, year, month, day, file, title):
 		self.year = year
 		self.month = month
 		self.day = day
@ -47,7 +65,7 @@ class MediaPost():
 			copy(self.file, target)

 	def try_parse(self):
-		for re in self.name_res:
+		for re in NAME_RES:
 			if m := re.match(self.title):
 				self.defn = m.group(1)
 				if len(m.groups()) == 2:
@ -57,19 +75,19 @@ class MediaPost():
 		self.defn = self.title


-def process_post(cf, archive, obj):
+def process_post(archive, obj):
 	date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
 	year = f"{date.year}"
 	month = f"{date.month:02}"
 	day = f"{date.day:02}"
 	attachment = obj["attachment"][0]
 	url = attachment["url"]
-	if m:= cf["url_re"].match(url):
+	if m:= URL_RE.match(url):
 		file = Path(archive) / m.group(1)
 	else:
 		raise ValueError(f"Couldn't match url {url}")
 	alt = attachment["name"]
-	return MediaPost(cf["title_res"], year, month, day, file, alt)
+	return MediaPost(year, month, day, file, alt)


 def ensure_dir(gmdir):
@ -84,15 +102,6 @@ def load_colophon(cfile):
 			return "".join(colophon)
 	return None

-
-def load_config(config):
-	with open(config, "r") as cfh:
-		cf = json.load(cfh)
-		cf["url_re"] = re.compile(cf["url_re"])
-		cf["title_res"] = [ re.compile(r) for r in cf["title_res"] ]
-	return cf
-
-
 def write_gemfile(gmdir, colophon, title, items):
 	ensure_dir(gmdir)
 	gmi = gmdir / "index.gmi"
@ -105,14 +114,14 @@ def write_gemfile(gmdir, colophon, title, items):
 			gfh.write(f"=> {link} {text}\n")


-def apub2gmi(cf, archive, output, colophon):
+def apub2gmi(archive, output, colophon):
 	with open(f"{archive}/outbox.json", "r") as fh:
 		js = json.load(fh)
 		posts = {}
 		for item in js["orderedItems"]:
 			if item["type"] == "Create":
 				try:
-					post = process_post(cf, archive, item["object"])
+					post = process_post(archive, item["object"])
 					if not post.year in posts:
 						posts[post.year] = {}
 					if not post.month in posts[post.year]:
@ -160,13 +169,9 @@ if __name__ == "__main__":
 		'-o', '--output', required=True, type=str, help="Output directory"
 		)
 	ap.add_argument(
-		'-c', '--config', required=True, type=str, help="Config file"
-	)
-	ap.add_argument(
-		'-t', '--text', required=False, type=str,
+		'-c', '--colophon', required=False, type=str,
 		help="File with text to be included at the top of each index page"
 		)
 	args = ap.parse_args()
-	cf = load_config(args.config)
-	colophon = load_colophon(args.text)
-	apub2gmi(cf, args.archive, args.output, colophon)
+	colophon = load_colophon(args.colophon)
+	apub2gmi(args.archive, args.output, colophon)
--- a/config.json
+++ b/config.json
@ -1,7 +0,0 @@
-{
-	"url_re": "^/some_pattern/(.*)$",
-	"title_res": [
-		"^(.*?)\\.\\s*(.*)$",
-		"^(.*?): (.*)$"
-	]
-}