2024-05-04 03:33:34 +00:00
#!/usr/bin/env python
# convert the Glossatory archive from an ActivityPub collection to
# gemini
2024-05-04 03:52:19 +00:00
import argparse
2024-05-04 03:33:34 +00:00
import datetime
2024-05-04 03:52:19 +00:00
import json
2024-05-04 03:33:34 +00:00
import re
from pathlib import Path
from shutil import copy
MNAMES = {
" 01 " : " January " ,
" 02 " : " February " ,
" 03 " : " March " ,
" 04 " : " April " ,
" 05 " : " May " ,
" 06 " : " June " ,
" 07 " : " July " ,
" 08 " : " August " ,
" 09 " : " September " ,
" 10 " : " October " ,
" 11 " : " November " ,
" 12 " : " December " ,
}
HEADER = """ This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database.
= > https : / / weirder . earth / @GLOSSATORY Follow the drawings on Mastodon
= > https : / / botsin . space / @GLOSSATORY Follow the words on Mastodon
= > https : / / oulipo . social / @GLOSSATORY Follow the words without the letter " e "
= > / Back to Mike ' s gemini
= > / glossatory / Glossatory archive home
"""
2024-05-04 03:52:19 +00:00
URL_RE = re . compile ( r " ^/weirderearth/(.*)$ " )
2024-05-04 03:33:34 +00:00
NAME_RES = [
re . compile ( r " ^(.*?) \ . \ s*(.*)$ " ) ,
re . compile ( r " ^(.*?) \ s*(The drawing.*)$ " ) ,
re . compile ( r " ^A line drawing depicting (.*)$ " ) ,
re . compile ( r " ^(.*?): (.*)$ " ) ,
]
2024-05-04 04:14:36 +00:00
class MediaPost ( ) :
2024-05-04 03:33:34 +00:00
def __init__ ( self , year , month , day , file , title ) :
self . year = year
self . month = month
self . day = day
self . file = file
self . fname = Path ( file ) . name
self . title = title
self . defn = " "
self . desc = " "
self . try_parse ( )
def copy_image ( self , root ) :
d = Path ( root ) / self . year / self . month / self . day
target = d / self . fname
if not target . exists ( ) :
copy ( self . file , target )
def try_parse ( self ) :
for re in NAME_RES :
if m := re . match ( self . title ) :
self . defn = m . group ( 1 )
if len ( m . groups ( ) ) == 2 :
self . desc = m . group ( 2 )
return
print ( f " { self . file } Couldn ' t match title { self . title } " )
self . defn = self . title
2024-05-04 03:52:19 +00:00
def process_post ( archive , obj ) :
2024-05-04 03:33:34 +00:00
date = datetime . datetime . strptime ( obj [ " published " ] [ : 10 ] , " % Y- % m- %d " )
year = f " { date . year } "
month = f " { date . month : 02 } "
day = f " { date . day : 02 } "
attachment = obj [ " attachment " ] [ 0 ]
url = attachment [ " url " ]
if m := URL_RE . match ( url ) :
2024-05-04 03:52:19 +00:00
file = Path ( archive ) / m . group ( 1 )
2024-05-04 03:33:34 +00:00
else :
raise ValueError ( f " Couldn ' t match url { url } " )
alt = attachment [ " name " ]
2024-05-04 04:14:36 +00:00
return MediaPost ( year , month , day , file , alt )
2024-05-04 03:33:34 +00:00
def ensure_dir ( gmdir ) :
if not gmdir . is_dir ( ) :
gmdir . mkdir ( parents = True )
2024-05-04 04:14:36 +00:00
def load_colophon ( cfile ) :
if cfile :
with open ( cfile , " r " ) as cfh :
colophon = cfh . readlines ( )
return " " . join ( colophon )
return None
def write_gemfile ( gmdir , colophon , title , items ) :
2024-05-04 03:33:34 +00:00
ensure_dir ( gmdir )
gmi = gmdir / " index.gmi "
with open ( gmi , " w " ) as gfh :
2024-05-04 04:14:36 +00:00
if colophon :
gfh . write ( colophon )
gfh . write ( " \n \n " )
2024-05-04 03:33:34 +00:00
gfh . write ( f " # { title } \n \n " )
for link , text in items :
gfh . write ( f " => { link } { text } \n " )
2024-05-04 04:14:36 +00:00
def apub2gmi ( archive , output , colophon ) :
2024-05-04 03:52:19 +00:00
with open ( f " { archive } /outbox.json " , " r " ) as fh :
js = json . load ( fh )
posts = { }
for item in js [ " orderedItems " ] :
if item [ " type " ] == " Create " :
try :
post = process_post ( archive , item [ " object " ] )
if not post . year in posts :
posts [ post . year ] = { }
if not post . month in posts [ post . year ] :
posts [ post . year ] [ post . month ] = { }
m = posts [ post . year ] [ post . month ]
if not post . day in m :
m [ post . day ] = [ post ]
else :
m [ post . day ] . append ( post )
except Exception as e :
i = item [ " id " ]
print ( f " Processing failed: { i } : { e } " )
years = [ ( f " { year } / " , year ) for year in posts ]
2024-05-04 04:14:36 +00:00
write_gemfile ( Path ( output ) , colophon , " Glossatory " , years )
2024-05-04 03:52:19 +00:00
for year in posts :
ydir = Path ( output ) / year
months = [ ( f " { month } / " , MNAMES [ month ] ) for month in posts [ year ] ]
2024-05-04 04:14:36 +00:00
write_gemfile ( ydir , colophon , year , months )
2024-05-04 03:52:19 +00:00
for month in posts [ year ] :
mname = MNAMES [ month ]
mdir = ydir / month
for day in posts [ year ] [ month ] :
ddir = mdir / day
ddir . mkdir ( parents = True , exist_ok = True )
for post in posts [ year ] [ month ] [ day ] :
post . copy_image ( output )
gmi = mdir / " index.gmi "
links = [
( f " /glossatory/ { year } / " , year ) ,
]
for day in posts [ year ] [ month ] :
for post in posts [ year ] [ month ] [ day ] :
links . append ( ( f " { day } / { post . fname } " , post . title ) )
2024-05-04 04:14:36 +00:00
write_gemfile ( mdir , colophon , f " { mname } { year } " , links )
2024-05-04 03:52:19 +00:00
if __name__ == " __main__ " :
ap = argparse . ArgumentParser ( )
ap . add_argument (
' -a ' , ' --archive ' , required = True , type = str , help = " ActivityPub archive "
)
ap . add_argument (
' -o ' , ' --output ' , required = True , type = str , help = " Output directory "
)
2024-05-04 04:14:36 +00:00
ap . add_argument (
' -c ' , ' --colophon ' , required = False , type = str ,
help = " File with text to be included at the top of each index page "
)
2024-05-04 03:52:19 +00:00
args = ap . parse_args ( )
2024-05-04 04:14:36 +00:00
colophon = load_colophon ( args . colophon )
apub2gmi ( args . archive , args . output , colophon )