Compare commits
No commits in common. "60b3dc619edc757073f7cd8a1db028a28f6a60e4" and "18c7f756ae7d6bb6e76754a416eb03646f3d3933" have entirely different histories.
60b3dc619e
...
18c7f756ae
|
@ -2,6 +2,4 @@
|
||||||
|
|
||||||
This is a script which takes an archive exported from a Mastodon account, looks for media attachments and uses them to build an archive for a Gemini server.
|
This is a script which takes an archive exported from a Mastodon account, looks for media attachments and uses them to build an archive for a Gemini server.
|
||||||
|
|
||||||
I use it to update the [Glossatory archives](gemini://gemini.mikelynch.org/glossatory/) and it assumes that all of the media attachments have a name with
|
I use it to update the [Glossatory archives](gemini://gemini.mikelynch.org/glossatory/) and there's still a few things hard-coded in it which I haven't moved out to a config file, specifically the regular expressions which match attachment URLs and pull bits of text out for the index links.
|
||||||
two parts you want to pull out of it and use as the human-readable form of the
|
|
||||||
index.
|
|
53
apub2gmi.py
53
apub2gmi.py
|
@ -26,10 +26,28 @@ MNAMES = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database.
|
||||||
|
|
||||||
|
=> https://weirder.earth/@GLOSSATORY Follow the drawings on Mastodon
|
||||||
|
=> https://botsin.space/@GLOSSATORY Follow the words on Mastodon
|
||||||
|
=> https://oulipo.social/@GLOSSATORY Follow the words without the letter "e"
|
||||||
|
=> / Back to Mike's gemini
|
||||||
|
=> /glossatory/ Glossatory archive home
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
URL_RE = re.compile(r"^/weirderearth/(.*)$")
|
||||||
|
NAME_RES = [
|
||||||
|
re.compile(r"^(.*?)\.\s*(.*)$"),
|
||||||
|
re.compile(r"^(.*?)\s*(The drawing.*)$"),
|
||||||
|
re.compile(r"^A line drawing depicting (.*)$"),
|
||||||
|
re.compile(r"^(.*?): (.*)$"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class MediaPost():
|
class MediaPost():
|
||||||
def __init__(self, name_res, year, month, day, file, title):
|
def __init__(self, year, month, day, file, title):
|
||||||
self.name_res = name_res
|
|
||||||
self.year = year
|
self.year = year
|
||||||
self.month = month
|
self.month = month
|
||||||
self.day = day
|
self.day = day
|
||||||
|
@ -47,7 +65,7 @@ class MediaPost():
|
||||||
copy(self.file, target)
|
copy(self.file, target)
|
||||||
|
|
||||||
def try_parse(self):
|
def try_parse(self):
|
||||||
for re in self.name_res:
|
for re in NAME_RES:
|
||||||
if m := re.match(self.title):
|
if m := re.match(self.title):
|
||||||
self.defn = m.group(1)
|
self.defn = m.group(1)
|
||||||
if len(m.groups()) == 2:
|
if len(m.groups()) == 2:
|
||||||
|
@ -57,19 +75,19 @@ class MediaPost():
|
||||||
self.defn = self.title
|
self.defn = self.title
|
||||||
|
|
||||||
|
|
||||||
def process_post(cf, archive, obj):
|
def process_post(archive, obj):
|
||||||
date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
|
date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
|
||||||
year = f"{date.year}"
|
year = f"{date.year}"
|
||||||
month = f"{date.month:02}"
|
month = f"{date.month:02}"
|
||||||
day = f"{date.day:02}"
|
day = f"{date.day:02}"
|
||||||
attachment = obj["attachment"][0]
|
attachment = obj["attachment"][0]
|
||||||
url = attachment["url"]
|
url = attachment["url"]
|
||||||
if m:= cf["url_re"].match(url):
|
if m:= URL_RE.match(url):
|
||||||
file = Path(archive) / m.group(1)
|
file = Path(archive) / m.group(1)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Couldn't match url {url}")
|
raise ValueError(f"Couldn't match url {url}")
|
||||||
alt = attachment["name"]
|
alt = attachment["name"]
|
||||||
return MediaPost(cf["title_res"], year, month, day, file, alt)
|
return MediaPost(year, month, day, file, alt)
|
||||||
|
|
||||||
|
|
||||||
def ensure_dir(gmdir):
|
def ensure_dir(gmdir):
|
||||||
|
@ -84,15 +102,6 @@ def load_colophon(cfile):
|
||||||
return "".join(colophon)
|
return "".join(colophon)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def load_config(config):
|
|
||||||
with open(config, "r") as cfh:
|
|
||||||
cf = json.load(cfh)
|
|
||||||
cf["url_re"] = re.compile(cf["url_re"])
|
|
||||||
cf["title_res"] = [ re.compile(r) for r in cf["title_res"] ]
|
|
||||||
return cf
|
|
||||||
|
|
||||||
|
|
||||||
def write_gemfile(gmdir, colophon, title, items):
|
def write_gemfile(gmdir, colophon, title, items):
|
||||||
ensure_dir(gmdir)
|
ensure_dir(gmdir)
|
||||||
gmi = gmdir / "index.gmi"
|
gmi = gmdir / "index.gmi"
|
||||||
|
@ -105,14 +114,14 @@ def write_gemfile(gmdir, colophon, title, items):
|
||||||
gfh.write(f"=> {link} {text}\n")
|
gfh.write(f"=> {link} {text}\n")
|
||||||
|
|
||||||
|
|
||||||
def apub2gmi(cf, archive, output, colophon):
|
def apub2gmi(archive, output, colophon):
|
||||||
with open(f"{archive}/outbox.json", "r") as fh:
|
with open(f"{archive}/outbox.json", "r") as fh:
|
||||||
js = json.load(fh)
|
js = json.load(fh)
|
||||||
posts = {}
|
posts = {}
|
||||||
for item in js["orderedItems"]:
|
for item in js["orderedItems"]:
|
||||||
if item["type"] == "Create":
|
if item["type"] == "Create":
|
||||||
try:
|
try:
|
||||||
post = process_post(cf, archive, item["object"])
|
post = process_post(archive, item["object"])
|
||||||
if not post.year in posts:
|
if not post.year in posts:
|
||||||
posts[post.year] = {}
|
posts[post.year] = {}
|
||||||
if not post.month in posts[post.year]:
|
if not post.month in posts[post.year]:
|
||||||
|
@ -160,13 +169,9 @@ if __name__ == "__main__":
|
||||||
'-o', '--output', required=True, type=str, help="Output directory"
|
'-o', '--output', required=True, type=str, help="Output directory"
|
||||||
)
|
)
|
||||||
ap.add_argument(
|
ap.add_argument(
|
||||||
'-c', '--config', required=True, type=str, help="Config file"
|
'-c', '--colophon', required=False, type=str,
|
||||||
)
|
|
||||||
ap.add_argument(
|
|
||||||
'-t', '--text', required=False, type=str,
|
|
||||||
help="File with text to be included at the top of each index page"
|
help="File with text to be included at the top of each index page"
|
||||||
)
|
)
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
cf = load_config(args.config)
|
colophon = load_colophon(args.colophon)
|
||||||
colophon = load_colophon(args.text)
|
apub2gmi(args.archive, args.output, colophon)
|
||||||
apub2gmi(cf, args.archive, args.output, colophon)
|
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
{
|
|
||||||
"url_re": "^/some_pattern/(.*)$",
|
|
||||||
"title_res": [
|
|
||||||
"^(.*?)\\.\\s*(.*)$",
|
|
||||||
"^(.*?): (.*)$"
|
|
||||||
]
|
|
||||||
}
|
|
Loading…
Reference in New Issue