2024-05-04 03:33:34 +00:00
#!/usr/bin/env python
# convert the Glossatory archive from an ActivityPub collection to
# gemini
2024-05-04 03:52:19 +00:00
import argparse
2024-05-04 03:33:34 +00:00
import datetime
2024-05-04 03:52:19 +00:00
import json
2024-05-04 03:33:34 +00:00
import re
from pathlib import Path
from shutil import copy
MNAMES = {
" 01 " : " January " ,
" 02 " : " February " ,
" 03 " : " March " ,
" 04 " : " April " ,
" 05 " : " May " ,
" 06 " : " June " ,
" 07 " : " July " ,
" 08 " : " August " ,
" 09 " : " September " ,
" 10 " : " October " ,
" 11 " : " November " ,
" 12 " : " December " ,
}
HEADER = """ This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database.
= > https : / / weirder . earth / @GLOSSATORY Follow the drawings on Mastodon
= > https : / / botsin . space / @GLOSSATORY Follow the words on Mastodon
= > https : / / oulipo . social / @GLOSSATORY Follow the words without the letter " e "
= > / Back to Mike ' s gemini
= > / glossatory / Glossatory archive home
"""
2024-05-04 03:52:19 +00:00
URL_RE = re . compile ( r " ^/weirderearth/(.*)$ " )
2024-05-04 03:33:34 +00:00
NAME_RES = [
re . compile ( r " ^(.*?) \ . \ s*(.*)$ " ) ,
re . compile ( r " ^(.*?) \ s*(The drawing.*)$ " ) ,
re . compile ( r " ^A line drawing depicting (.*)$ " ) ,
re . compile ( r " ^(.*?): (.*)$ " ) ,
]
class GlossatoryPost ( ) :
def __init__ ( self , year , month , day , file , title ) :
self . year = year
self . month = month
self . day = day
self . file = file
self . fname = Path ( file ) . name
self . title = title
self . defn = " "
self . desc = " "
self . try_parse ( )
def copy_image ( self , root ) :
d = Path ( root ) / self . year / self . month / self . day
target = d / self . fname
if not target . exists ( ) :
copy ( self . file , target )
def try_parse ( self ) :
for re in NAME_RES :
if m := re . match ( self . title ) :
self . defn = m . group ( 1 )
if len ( m . groups ( ) ) == 2 :
self . desc = m . group ( 2 )
return
print ( f " { self . file } Couldn ' t match title { self . title } " )
self . defn = self . title
2024-05-04 03:52:19 +00:00
def process_post ( archive , obj ) :
2024-05-04 03:33:34 +00:00
date = datetime . datetime . strptime ( obj [ " published " ] [ : 10 ] , " % Y- % m- %d " )
year = f " { date . year } "
month = f " { date . month : 02 } "
day = f " { date . day : 02 } "
attachment = obj [ " attachment " ] [ 0 ]
url = attachment [ " url " ]
if m := URL_RE . match ( url ) :
2024-05-04 03:52:19 +00:00
file = Path ( archive ) / m . group ( 1 )
2024-05-04 03:33:34 +00:00
else :
raise ValueError ( f " Couldn ' t match url { url } " )
alt = attachment [ " name " ]
return GlossatoryPost ( year , month , day , file , alt )
def ensure_dir ( gmdir ) :
if not gmdir . is_dir ( ) :
gmdir . mkdir ( parents = True )
def write_gemfile ( gmdir , title , items ) :
ensure_dir ( gmdir )
gmi = gmdir / " index.gmi "
with open ( gmi , " w " ) as gfh :
gfh . write ( HEADER )
gfh . write ( f " # { title } \n \n " )
for link , text in items :
gfh . write ( f " => { link } { text } \n " )
2024-05-04 03:52:19 +00:00
def apub2gmi ( archive , output ) :
with open ( f " { archive } /outbox.json " , " r " ) as fh :
js = json . load ( fh )
posts = { }
for item in js [ " orderedItems " ] :
if item [ " type " ] == " Create " :
try :
post = process_post ( archive , item [ " object " ] )
if not post . year in posts :
posts [ post . year ] = { }
if not post . month in posts [ post . year ] :
posts [ post . year ] [ post . month ] = { }
m = posts [ post . year ] [ post . month ]
if not post . day in m :
m [ post . day ] = [ post ]
else :
m [ post . day ] . append ( post )
except Exception as e :
i = item [ " id " ]
print ( f " Processing failed: { i } : { e } " )
years = [ ( f " { year } / " , year ) for year in posts ]
write_gemfile ( Path ( output ) , " Glossatory " , years )
for year in posts :
ydir = Path ( output ) / year
months = [ ( f " { month } / " , MNAMES [ month ] ) for month in posts [ year ] ]
write_gemfile ( ydir , year , months )
for month in posts [ year ] :
mname = MNAMES [ month ]
mdir = ydir / month
for day in posts [ year ] [ month ] :
ddir = mdir / day
ddir . mkdir ( parents = True , exist_ok = True )
for post in posts [ year ] [ month ] [ day ] :
post . copy_image ( output )
gmi = mdir / " index.gmi "
links = [
( f " /glossatory/ { year } / " , year ) ,
]
for day in posts [ year ] [ month ] :
for post in posts [ year ] [ month ] [ day ] :
links . append ( ( f " { day } / { post . fname } " , post . title ) )
write_gemfile ( mdir , f " { mname } { year } " , links )
if __name__ == " __main__ " :
ap = argparse . ArgumentParser ( )
ap . add_argument (
' -a ' , ' --archive ' , required = True , type = str , help = " ActivityPub archive "
)
ap . add_argument (
' -o ' , ' --output ' , required = True , type = str , help = " Output directory "
)
args = ap . parse_args ( )
apub2gmi ( args . archive , args . output )