Inital commit
commit
4e3405727d
|
@ -0,0 +1,155 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
# convert the Glossatory archive from an ActivityPub collection to
|
||||
# gemini
|
||||
|
||||
import json
|
||||
import datetime
|
||||
import re
|
||||
from pathlib import Path
|
||||
from shutil import copy
|
||||
|
||||
MNAMES = {
|
||||
"01": "January",
|
||||
"02": "February",
|
||||
"03": "March",
|
||||
"04": "April",
|
||||
"05": "May",
|
||||
"06": "June",
|
||||
"07": "July",
|
||||
"08": "August",
|
||||
"09": "September",
|
||||
"10": "October",
|
||||
"11": "November",
|
||||
"12": "December",
|
||||
}
|
||||
|
||||
ARCHIVE = "archive-20230604031441-05906d0df7f3f14777089c2fd7d0175a"
|
||||
OUTDIR = "gemini"
|
||||
|
||||
HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database.
|
||||
|
||||
=> https://weirder.earth/@GLOSSATORY Follow the drawings on Mastodon
|
||||
=> https://botsin.space/@GLOSSATORY Follow the words on Mastodon
|
||||
=> https://oulipo.social/@GLOSSATORY Follow the words without the letter "e"
|
||||
=> / Back to Mike's gemini
|
||||
=> /glossatory/ Glossatory archive home
|
||||
|
||||
"""
|
||||
|
||||
URL_RE = re.compile(r"^/files\.weirder\.earth/(.*)$")
|
||||
NAME_RES = [
|
||||
re.compile(r"^(.*?)\.\s*(.*)$"),
|
||||
re.compile(r"^(.*?)\s*(The drawing.*)$"),
|
||||
re.compile(r"^A line drawing depicting (.*)$"),
|
||||
re.compile(r"^(.*?): (.*)$"),
|
||||
]
|
||||
|
||||
|
||||
|
||||
class GlossatoryPost():
|
||||
def __init__(self, year, month, day, file, title):
|
||||
self.year = year
|
||||
self.month = month
|
||||
self.day = day
|
||||
self.file = file
|
||||
self.fname = Path(file).name
|
||||
self.title = title
|
||||
self.defn = ""
|
||||
self.desc = ""
|
||||
self.try_parse()
|
||||
|
||||
def copy_image(self, root):
|
||||
d = Path(root) / self.year / self.month / self.day
|
||||
target = d / self.fname
|
||||
if not target.exists():
|
||||
copy(self.file, target)
|
||||
|
||||
def try_parse(self):
|
||||
for re in NAME_RES:
|
||||
if m := re.match(self.title):
|
||||
self.defn = m.group(1)
|
||||
if len(m.groups()) == 2:
|
||||
self.desc = m.group(2)
|
||||
return
|
||||
print(f"{self.file} Couldn't match title {self.title}")
|
||||
self.defn = self.title
|
||||
|
||||
|
||||
def process_post(obj):
|
||||
date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
|
||||
year = f"{date.year}"
|
||||
month = f"{date.month:02}"
|
||||
day = f"{date.day:02}"
|
||||
attachment = obj["attachment"][0]
|
||||
url = attachment["url"]
|
||||
if m:= URL_RE.match(url):
|
||||
file = Path(ARCHIVE) / m.group(1)
|
||||
else:
|
||||
raise ValueError(f"Couldn't match url {url}")
|
||||
alt = attachment["name"]
|
||||
return GlossatoryPost(year, month, day, file, alt)
|
||||
|
||||
|
||||
def ensure_dir(gmdir):
|
||||
if not gmdir.is_dir():
|
||||
gmdir.mkdir(parents=True)
|
||||
|
||||
|
||||
def write_gemfile(gmdir, title, items):
|
||||
ensure_dir(gmdir)
|
||||
gmi = gmdir / "index.gmi"
|
||||
with open(gmi, "w") as gfh:
|
||||
gfh.write(HEADER)
|
||||
gfh.write(f"# {title}\n\n")
|
||||
for link, text in items:
|
||||
gfh.write(f"=> {link} {text}\n")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
with open(f"{ARCHIVE}/outbox.json", "r") as fh:
|
||||
js = json.load(fh)
|
||||
posts = {}
|
||||
for item in js["orderedItems"]:
|
||||
if item["type"] == "Create":
|
||||
try:
|
||||
post = process_post(item["object"])
|
||||
if not post.year in posts:
|
||||
posts[post.year] = {}
|
||||
if not post.month in posts[post.year]:
|
||||
posts[post.year][post.month] = {}
|
||||
m = posts[post.year][post.month]
|
||||
if not post.day in m:
|
||||
m[post.day] = [ post ]
|
||||
else:
|
||||
m[post.day].append(post)
|
||||
except Exception as e:
|
||||
i = item["id"]
|
||||
print(f"Processing failed: {i}: {e}")
|
||||
|
||||
years = [ ( f"{year}/", year ) for year in posts ]
|
||||
write_gemfile(Path(OUTDIR), "Glossatory", years)
|
||||
|
||||
for year in posts:
|
||||
ydir = Path(OUTDIR) / year
|
||||
months = [ ( f"{month}/", MNAMES[month] ) for month in posts[year] ]
|
||||
write_gemfile(ydir, year, months)
|
||||
for month in posts[year]:
|
||||
mname = MNAMES[month]
|
||||
mdir = ydir / month
|
||||
for day in posts[year][month]:
|
||||
ddir = mdir / day
|
||||
ddir.mkdir(parents=True, exist_ok=True)
|
||||
for post in posts[year][month][day]:
|
||||
post.copy_image(OUTDIR)
|
||||
gmi = mdir / "index.gmi"
|
||||
links = [
|
||||
( f"/glossatory/{year}/", year),
|
||||
]
|
||||
for day in posts[year][month]:
|
||||
for post in posts[year][month][day]:
|
||||
links.append((f"{day}/{post.fname}", post.title))
|
||||
write_gemfile(mdir, f"{mname} {year}", links)
|
||||
|
Loading…
Reference in New Issue