diff --git a/apub2gmi.py b/apub2gmi.py index 32d2336..2694097 100755 --- a/apub2gmi.py +++ b/apub2gmi.py @@ -26,28 +26,10 @@ MNAMES = { } -HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database. - -=> https://weirder.earth/@GLOSSATORY Follow the drawings on Mastodon -=> https://botsin.space/@GLOSSATORY Follow the words on Mastodon -=> https://oulipo.social/@GLOSSATORY Follow the words without the letter "e" -=> / Back to Mike's gemini -=> /glossatory/ Glossatory archive home - -""" - -URL_RE = re.compile(r"^/weirderearth/(.*)$") -NAME_RES = [ - re.compile(r"^(.*?)\.\s*(.*)$"), - re.compile(r"^(.*?)\s*(The drawing.*)$"), - re.compile(r"^A line drawing depicting (.*)$"), - re.compile(r"^(.*?): (.*)$"), - ] - - class MediaPost(): - def __init__(self, year, month, day, file, title): + def __init__(self, name_res, year, month, day, file, title): + self.name_res = name_res self.year = year self.month = month self.day = day @@ -65,7 +47,7 @@ class MediaPost(): copy(self.file, target) def try_parse(self): - for re in NAME_RES: + for re in self.name_res: if m := re.match(self.title): self.defn = m.group(1) if len(m.groups()) == 2: @@ -75,19 +57,19 @@ class MediaPost(): self.defn = self.title -def process_post(archive, obj): +def process_post(cf, archive, obj): date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d") year = f"{date.year}" month = f"{date.month:02}" day = f"{date.day:02}" attachment = obj["attachment"][0] url = attachment["url"] - if m:= URL_RE.match(url): + if m:= cf["url_re"].match(url): file = Path(archive) / m.group(1) else: raise ValueError(f"Couldn't match url {url}") alt = attachment["name"] - return MediaPost(year, month, day, file, alt) + return MediaPost(cf["title_res"], year, month, day, file, alt) def ensure_dir(gmdir): @@ -102,6 +84,15 @@ def load_colophon(cfile): return "".join(colophon) return None + +def load_config(config): + with open(config, "r") as cfh: + cf = json.load(cfh) + cf["url_re"] = re.compile(cf["url_re"]) + cf["title_res"] = [ re.compile(r) for r in cf["title_res"] ] + return cf + + def write_gemfile(gmdir, colophon, title, items): ensure_dir(gmdir) gmi = gmdir / "index.gmi" @@ -114,14 +105,14 @@ def write_gemfile(gmdir, colophon, title, items): gfh.write(f"=> {link} {text}\n") -def apub2gmi(archive, output, colophon): +def apub2gmi(cf, archive, output, colophon): with open(f"{archive}/outbox.json", "r") as fh: js = json.load(fh) posts = {} for item in js["orderedItems"]: if item["type"] == "Create": try: - post = process_post(archive, item["object"]) + post = process_post(cf, archive, item["object"]) if not post.year in posts: posts[post.year] = {} if not post.month in posts[post.year]: @@ -169,9 +160,13 @@ if __name__ == "__main__": '-o', '--output', required=True, type=str, help="Output directory" ) ap.add_argument( - '-c', '--colophon', required=False, type=str, + '-c', '--config', required=True, type=str, help="Config file" + ) + ap.add_argument( + '-t', '--text', required=False, type=str, help="File with text to be included at the top of each index page" ) args = ap.parse_args() - colophon = load_colophon(args.colophon) - apub2gmi(args.archive, args.output, colophon) + cf = load_config(args.config) + colophon = load_colophon(args.text) + apub2gmi(cf, args.archive, args.output, colophon) diff --git a/config.json b/config.json new file mode 100644 index 0000000..737c9be --- /dev/null +++ b/config.json @@ -0,0 +1,7 @@ +{ + "url_re": "^/some_pattern/(.*)$", + "title_res": [ + "^(.*?)\\.\\s*(.*)$", + "^(.*?): (.*)$" + ] +}