Compare commits

..

No commits in common. "60b3dc619edc757073f7cd8a1db028a28f6a60e4" and "18c7f756ae7d6bb6e76754a416eb03646f3d3933" have entirely different histories.

3 changed files with 30 additions and 34 deletions

View File

@ -2,6 +2,4 @@
This is a script which takes an archive exported from a Mastodon account, looks for media attachments and uses them to build an archive for a Gemini server. This is a script which takes an archive exported from a Mastodon account, looks for media attachments and uses them to build an archive for a Gemini server.
I use it to update the [Glossatory archives](gemini://gemini.mikelynch.org/glossatory/) and it assumes that all of the media attachments have a name with I use it to update the [Glossatory archives](gemini://gemini.mikelynch.org/glossatory/) and there's still a few things hard-coded in it which I haven't moved out to a config file, specifically the regular expressions which match attachment URLs and pull bits of text out for the index links.
two parts you want to pull out of it and use as the human-readable form of the
index.

View File

@ -26,10 +26,28 @@ MNAMES = {
} }
HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database.
=> https://weirder.earth/@GLOSSATORY Follow the drawings on Mastodon
=> https://botsin.space/@GLOSSATORY Follow the words on Mastodon
=> https://oulipo.social/@GLOSSATORY Follow the words without the letter "e"
=> / Back to Mike's gemini
=> /glossatory/ Glossatory archive home
"""
URL_RE = re.compile(r"^/weirderearth/(.*)$")
NAME_RES = [
re.compile(r"^(.*?)\.\s*(.*)$"),
re.compile(r"^(.*?)\s*(The drawing.*)$"),
re.compile(r"^A line drawing depicting (.*)$"),
re.compile(r"^(.*?): (.*)$"),
]
class MediaPost(): class MediaPost():
def __init__(self, name_res, year, month, day, file, title): def __init__(self, year, month, day, file, title):
self.name_res = name_res
self.year = year self.year = year
self.month = month self.month = month
self.day = day self.day = day
@ -47,7 +65,7 @@ class MediaPost():
copy(self.file, target) copy(self.file, target)
def try_parse(self): def try_parse(self):
for re in self.name_res: for re in NAME_RES:
if m := re.match(self.title): if m := re.match(self.title):
self.defn = m.group(1) self.defn = m.group(1)
if len(m.groups()) == 2: if len(m.groups()) == 2:
@ -57,19 +75,19 @@ class MediaPost():
self.defn = self.title self.defn = self.title
def process_post(cf, archive, obj): def process_post(archive, obj):
date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d") date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
year = f"{date.year}" year = f"{date.year}"
month = f"{date.month:02}" month = f"{date.month:02}"
day = f"{date.day:02}" day = f"{date.day:02}"
attachment = obj["attachment"][0] attachment = obj["attachment"][0]
url = attachment["url"] url = attachment["url"]
if m:= cf["url_re"].match(url): if m:= URL_RE.match(url):
file = Path(archive) / m.group(1) file = Path(archive) / m.group(1)
else: else:
raise ValueError(f"Couldn't match url {url}") raise ValueError(f"Couldn't match url {url}")
alt = attachment["name"] alt = attachment["name"]
return MediaPost(cf["title_res"], year, month, day, file, alt) return MediaPost(year, month, day, file, alt)
def ensure_dir(gmdir): def ensure_dir(gmdir):
@ -84,15 +102,6 @@ def load_colophon(cfile):
return "".join(colophon) return "".join(colophon)
return None return None
def load_config(config):
with open(config, "r") as cfh:
cf = json.load(cfh)
cf["url_re"] = re.compile(cf["url_re"])
cf["title_res"] = [ re.compile(r) for r in cf["title_res"] ]
return cf
def write_gemfile(gmdir, colophon, title, items): def write_gemfile(gmdir, colophon, title, items):
ensure_dir(gmdir) ensure_dir(gmdir)
gmi = gmdir / "index.gmi" gmi = gmdir / "index.gmi"
@ -105,14 +114,14 @@ def write_gemfile(gmdir, colophon, title, items):
gfh.write(f"=> {link} {text}\n") gfh.write(f"=> {link} {text}\n")
def apub2gmi(cf, archive, output, colophon): def apub2gmi(archive, output, colophon):
with open(f"{archive}/outbox.json", "r") as fh: with open(f"{archive}/outbox.json", "r") as fh:
js = json.load(fh) js = json.load(fh)
posts = {} posts = {}
for item in js["orderedItems"]: for item in js["orderedItems"]:
if item["type"] == "Create": if item["type"] == "Create":
try: try:
post = process_post(cf, archive, item["object"]) post = process_post(archive, item["object"])
if not post.year in posts: if not post.year in posts:
posts[post.year] = {} posts[post.year] = {}
if not post.month in posts[post.year]: if not post.month in posts[post.year]:
@ -160,13 +169,9 @@ if __name__ == "__main__":
'-o', '--output', required=True, type=str, help="Output directory" '-o', '--output', required=True, type=str, help="Output directory"
) )
ap.add_argument( ap.add_argument(
'-c', '--config', required=True, type=str, help="Config file" '-c', '--colophon', required=False, type=str,
)
ap.add_argument(
'-t', '--text', required=False, type=str,
help="File with text to be included at the top of each index page" help="File with text to be included at the top of each index page"
) )
args = ap.parse_args() args = ap.parse_args()
cf = load_config(args.config) colophon = load_colophon(args.colophon)
colophon = load_colophon(args.text) apub2gmi(args.archive, args.output, colophon)
apub2gmi(cf, args.archive, args.output, colophon)

View File

@ -1,7 +0,0 @@
{
"url_re": "^/some_pattern/(.*)$",
"title_res": [
"^(.*?)\\.\\s*(.*)$",
"^(.*?): (.*)$"
]
}