Made the script more mature, updated the media attachment RE
parent
4e3405727d
commit
564f366ae2
110
apub2gmi.py
110
apub2gmi.py
|
@ -3,8 +3,9 @@
|
||||||
# convert the Glossatory archive from an ActivityPub collection to
|
# convert the Glossatory archive from an ActivityPub collection to
|
||||||
# gemini
|
# gemini
|
||||||
|
|
||||||
import json
|
import argparse
|
||||||
import datetime
|
import datetime
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from shutil import copy
|
from shutil import copy
|
||||||
|
@ -24,8 +25,6 @@ MNAMES = {
|
||||||
"12": "December",
|
"12": "December",
|
||||||
}
|
}
|
||||||
|
|
||||||
ARCHIVE = "archive-20230604031441-05906d0df7f3f14777089c2fd7d0175a"
|
|
||||||
OUTDIR = "gemini"
|
|
||||||
|
|
||||||
HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database.
|
HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database.
|
||||||
|
|
||||||
|
@ -37,7 +36,7 @@ HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
URL_RE = re.compile(r"^/files\.weirder\.earth/(.*)$")
|
URL_RE = re.compile(r"^/weirderearth/(.*)$")
|
||||||
NAME_RES = [
|
NAME_RES = [
|
||||||
re.compile(r"^(.*?)\.\s*(.*)$"),
|
re.compile(r"^(.*?)\.\s*(.*)$"),
|
||||||
re.compile(r"^(.*?)\s*(The drawing.*)$"),
|
re.compile(r"^(.*?)\s*(The drawing.*)$"),
|
||||||
|
@ -76,7 +75,7 @@ class GlossatoryPost():
|
||||||
self.defn = self.title
|
self.defn = self.title
|
||||||
|
|
||||||
|
|
||||||
def process_post(obj):
|
def process_post(archive, obj):
|
||||||
date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
|
date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
|
||||||
year = f"{date.year}"
|
year = f"{date.year}"
|
||||||
month = f"{date.month:02}"
|
month = f"{date.month:02}"
|
||||||
|
@ -84,7 +83,7 @@ def process_post(obj):
|
||||||
attachment = obj["attachment"][0]
|
attachment = obj["attachment"][0]
|
||||||
url = attachment["url"]
|
url = attachment["url"]
|
||||||
if m:= URL_RE.match(url):
|
if m:= URL_RE.match(url):
|
||||||
file = Path(ARCHIVE) / m.group(1)
|
file = Path(archive) / m.group(1)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Couldn't match url {url}")
|
raise ValueError(f"Couldn't match url {url}")
|
||||||
alt = attachment["name"]
|
alt = attachment["name"]
|
||||||
|
@ -106,50 +105,59 @@ def write_gemfile(gmdir, title, items):
|
||||||
gfh.write(f"=> {link} {text}\n")
|
gfh.write(f"=> {link} {text}\n")
|
||||||
|
|
||||||
|
|
||||||
|
def apub2gmi(archive, output):
|
||||||
|
with open(f"{archive}/outbox.json", "r") as fh:
|
||||||
|
js = json.load(fh)
|
||||||
|
posts = {}
|
||||||
|
for item in js["orderedItems"]:
|
||||||
|
if item["type"] == "Create":
|
||||||
|
try:
|
||||||
|
post = process_post(archive, item["object"])
|
||||||
|
if not post.year in posts:
|
||||||
|
posts[post.year] = {}
|
||||||
|
if not post.month in posts[post.year]:
|
||||||
|
posts[post.year][post.month] = {}
|
||||||
|
m = posts[post.year][post.month]
|
||||||
|
if not post.day in m:
|
||||||
|
m[post.day] = [ post ]
|
||||||
|
else:
|
||||||
|
m[post.day].append(post)
|
||||||
|
except Exception as e:
|
||||||
|
i = item["id"]
|
||||||
|
print(f"Processing failed: {i}: {e}")
|
||||||
|
|
||||||
|
years = [ ( f"{year}/", year ) for year in posts ]
|
||||||
|
write_gemfile(Path(output), "Glossatory", years)
|
||||||
|
|
||||||
|
for year in posts:
|
||||||
|
ydir = Path(output) / year
|
||||||
|
months = [ ( f"{month}/", MNAMES[month] ) for month in posts[year] ]
|
||||||
|
write_gemfile(ydir, year, months)
|
||||||
|
for month in posts[year]:
|
||||||
|
mname = MNAMES[month]
|
||||||
|
mdir = ydir / month
|
||||||
|
for day in posts[year][month]:
|
||||||
|
ddir = mdir / day
|
||||||
|
ddir.mkdir(parents=True, exist_ok=True)
|
||||||
|
for post in posts[year][month][day]:
|
||||||
|
post.copy_image(output)
|
||||||
|
gmi = mdir / "index.gmi"
|
||||||
|
links = [
|
||||||
|
( f"/glossatory/{year}/", year),
|
||||||
|
]
|
||||||
|
for day in posts[year][month]:
|
||||||
|
for post in posts[year][month][day]:
|
||||||
|
links.append((f"{day}/{post.fname}", post.title))
|
||||||
|
write_gemfile(mdir, f"{mname} {year}", links)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
with open(f"{ARCHIVE}/outbox.json", "r") as fh:
|
ap = argparse.ArgumentParser()
|
||||||
js = json.load(fh)
|
ap.add_argument(
|
||||||
posts = {}
|
'-a', '--archive', required=True, type=str, help="ActivityPub archive"
|
||||||
for item in js["orderedItems"]:
|
)
|
||||||
if item["type"] == "Create":
|
ap.add_argument(
|
||||||
try:
|
'-o', '--output', required=True, type=str, help="Output directory"
|
||||||
post = process_post(item["object"])
|
)
|
||||||
if not post.year in posts:
|
args = ap.parse_args()
|
||||||
posts[post.year] = {}
|
apub2gmi(args.archive, args.output)
|
||||||
if not post.month in posts[post.year]:
|
|
||||||
posts[post.year][post.month] = {}
|
|
||||||
m = posts[post.year][post.month]
|
|
||||||
if not post.day in m:
|
|
||||||
m[post.day] = [ post ]
|
|
||||||
else:
|
|
||||||
m[post.day].append(post)
|
|
||||||
except Exception as e:
|
|
||||||
i = item["id"]
|
|
||||||
print(f"Processing failed: {i}: {e}")
|
|
||||||
|
|
||||||
years = [ ( f"{year}/", year ) for year in posts ]
|
|
||||||
write_gemfile(Path(OUTDIR), "Glossatory", years)
|
|
||||||
|
|
||||||
for year in posts:
|
|
||||||
ydir = Path(OUTDIR) / year
|
|
||||||
months = [ ( f"{month}/", MNAMES[month] ) for month in posts[year] ]
|
|
||||||
write_gemfile(ydir, year, months)
|
|
||||||
for month in posts[year]:
|
|
||||||
mname = MNAMES[month]
|
|
||||||
mdir = ydir / month
|
|
||||||
for day in posts[year][month]:
|
|
||||||
ddir = mdir / day
|
|
||||||
ddir.mkdir(parents=True, exist_ok=True)
|
|
||||||
for post in posts[year][month][day]:
|
|
||||||
post.copy_image(OUTDIR)
|
|
||||||
gmi = mdir / "index.gmi"
|
|
||||||
links = [
|
|
||||||
( f"/glossatory/{year}/", year),
|
|
||||||
]
|
|
||||||
for day in posts[year][month]:
|
|
||||||
for post in posts[year][month][day]:
|
|
||||||
links.append((f"{day}/{post.fname}", post.title))
|
|
||||||
write_gemfile(mdir, f"{mname} {year}", links)
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue