Compare commits
	
		
			2 Commits
		
	
	
		
			18c7f756ae
			...
			60b3dc619e
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					60b3dc619e | ||
| 
						 | 
					f6d6a5135e | 
@ -2,4 +2,6 @@
 | 
			
		||||
 | 
			
		||||
This is a script which takes an archive exported from a Mastodon account, looks for media attachments and uses them to build an archive for a Gemini server.
 | 
			
		||||
 | 
			
		||||
I use it to update the [Glossatory archives](gemini://gemini.mikelynch.org/glossatory/) and there's still a few things hard-coded in it which I haven't moved out to a config file, specifically the regular expressions which match attachment URLs and pull bits of text out for the index links.
 | 
			
		||||
I use it to update the [Glossatory archives](gemini://gemini.mikelynch.org/glossatory/) and it assumes that all of the media attachments have a name with
 | 
			
		||||
two parts you want to pull out of it and use as the human-readable form of the
 | 
			
		||||
index.
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										53
									
								
								apub2gmi.py
									
									
									
									
									
								
							
							
						
						
									
										53
									
								
								apub2gmi.py
									
									
									
									
									
								
							@ -26,28 +26,10 @@ MNAMES = {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
HEADER = """This is the archive of GLOSSATORY, an illustrated companion to a bot which generates dictionary entries based on an RNN trained on the WordNet lexical database.
 | 
			
		||||
 | 
			
		||||
=> https://weirder.earth/@GLOSSATORY Follow the drawings on Mastodon
 | 
			
		||||
=> https://botsin.space/@GLOSSATORY Follow the words on Mastodon
 | 
			
		||||
=> https://oulipo.social/@GLOSSATORY Follow the words without the letter "e"
 | 
			
		||||
=> / Back to Mike's gemini
 | 
			
		||||
=> /glossatory/ Glossatory archive home
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
URL_RE = re.compile(r"^/weirderearth/(.*)$")
 | 
			
		||||
NAME_RES = [
 | 
			
		||||
	re.compile(r"^(.*?)\.\s*(.*)$"),
 | 
			
		||||
	re.compile(r"^(.*?)\s*(The drawing.*)$"),
 | 
			
		||||
	re.compile(r"^A line drawing depicting (.*)$"),
 | 
			
		||||
	re.compile(r"^(.*?): (.*)$"),
 | 
			
		||||
	]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class MediaPost():
 | 
			
		||||
	def __init__(self, year, month, day, file, title):
 | 
			
		||||
	def __init__(self, name_res, year, month, day, file, title):
 | 
			
		||||
		self.name_res = name_res
 | 
			
		||||
		self.year = year
 | 
			
		||||
		self.month = month
 | 
			
		||||
		self.day = day
 | 
			
		||||
@ -65,7 +47,7 @@ class MediaPost():
 | 
			
		||||
			copy(self.file, target)
 | 
			
		||||
 | 
			
		||||
	def try_parse(self):
 | 
			
		||||
		for re in NAME_RES:
 | 
			
		||||
		for re in self.name_res:
 | 
			
		||||
			if m := re.match(self.title):
 | 
			
		||||
				self.defn = m.group(1)
 | 
			
		||||
				if len(m.groups()) == 2:
 | 
			
		||||
@ -75,19 +57,19 @@ class MediaPost():
 | 
			
		||||
		self.defn = self.title
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def process_post(archive, obj):
 | 
			
		||||
def process_post(cf, archive, obj):
 | 
			
		||||
	date = datetime.datetime.strptime(obj["published"][:10], "%Y-%m-%d")
 | 
			
		||||
	year = f"{date.year}"
 | 
			
		||||
	month = f"{date.month:02}"
 | 
			
		||||
	day = f"{date.day:02}"
 | 
			
		||||
	attachment = obj["attachment"][0]
 | 
			
		||||
	url = attachment["url"]
 | 
			
		||||
	if m:= URL_RE.match(url):
 | 
			
		||||
	if m:= cf["url_re"].match(url):
 | 
			
		||||
		file = Path(archive) / m.group(1)
 | 
			
		||||
	else:
 | 
			
		||||
		raise ValueError(f"Couldn't match url {url}")
 | 
			
		||||
	alt = attachment["name"]
 | 
			
		||||
	return MediaPost(year, month, day, file, alt)
 | 
			
		||||
	return MediaPost(cf["title_res"], year, month, day, file, alt)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def ensure_dir(gmdir):
 | 
			
		||||
@ -102,6 +84,15 @@ def load_colophon(cfile):
 | 
			
		||||
			return "".join(colophon)
 | 
			
		||||
	return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def load_config(config):
 | 
			
		||||
	with open(config, "r") as cfh:
 | 
			
		||||
		cf = json.load(cfh)
 | 
			
		||||
		cf["url_re"] = re.compile(cf["url_re"])
 | 
			
		||||
		cf["title_res"] = [ re.compile(r) for r in cf["title_res"] ]
 | 
			
		||||
	return cf
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def write_gemfile(gmdir, colophon, title, items):
 | 
			
		||||
	ensure_dir(gmdir)
 | 
			
		||||
	gmi = gmdir / "index.gmi"
 | 
			
		||||
@ -114,14 +105,14 @@ def write_gemfile(gmdir, colophon, title, items):
 | 
			
		||||
			gfh.write(f"=> {link} {text}\n")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def apub2gmi(archive, output, colophon):
 | 
			
		||||
def apub2gmi(cf, archive, output, colophon):
 | 
			
		||||
	with open(f"{archive}/outbox.json", "r") as fh:
 | 
			
		||||
		js = json.load(fh)
 | 
			
		||||
		posts = {}
 | 
			
		||||
		for item in js["orderedItems"]:
 | 
			
		||||
			if item["type"] == "Create":
 | 
			
		||||
				try:
 | 
			
		||||
					post = process_post(archive, item["object"])
 | 
			
		||||
					post = process_post(cf, archive, item["object"])
 | 
			
		||||
					if not post.year in posts:
 | 
			
		||||
						posts[post.year] = {}
 | 
			
		||||
					if not post.month in posts[post.year]:
 | 
			
		||||
@ -169,9 +160,13 @@ if __name__ == "__main__":
 | 
			
		||||
		'-o', '--output', required=True, type=str, help="Output directory"
 | 
			
		||||
		)
 | 
			
		||||
	ap.add_argument(
 | 
			
		||||
		'-c', '--colophon', required=False, type=str,
 | 
			
		||||
		'-c', '--config', required=True, type=str, help="Config file"
 | 
			
		||||
	)
 | 
			
		||||
	ap.add_argument(
 | 
			
		||||
		'-t', '--text', required=False, type=str,
 | 
			
		||||
		help="File with text to be included at the top of each index page"
 | 
			
		||||
		)
 | 
			
		||||
	args = ap.parse_args()
 | 
			
		||||
	colophon = load_colophon(args.colophon)
 | 
			
		||||
	apub2gmi(args.archive, args.output, colophon)
 | 
			
		||||
	cf = load_config(args.config)
 | 
			
		||||
	colophon = load_colophon(args.text)
 | 
			
		||||
	apub2gmi(cf, args.archive, args.output, colophon)
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										7
									
								
								config.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								config.json
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,7 @@
 | 
			
		||||
{
 | 
			
		||||
	"url_re": "^/some_pattern/(.*)$",
 | 
			
		||||
	"title_res": [
 | 
			
		||||
		"^(.*?)\\.\\s*(.*)$",
 | 
			
		||||
		"^(.*?): (.*)$"
 | 
			
		||||
	]
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user