forked from tildetown/tilde-wiki
		
	
		
			
				
	
	
		
			202 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			202 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import os
 | |
| import re
 | |
| from datetime import datetime
 | |
| from shutil import copy
 | |
| from typing import Optional, Callable
 | |
| 
 | |
| from markdown import markdown
 | |
| 
 | |
| DOUBLE_NEWLINE_RE = re.compile(r'\n\n', flags=re.MULTILINE|re.DOTALL)
 | |
| HEADER_TITLE_RE = re.compile(r'<h([12])>(.*?)</h\1>')
 | |
| TITLE_RE = re.compile(r'<title>.*?</title>')
 | |
| LINK_RE = re.compile(r'href="\/wiki')
 | |
| SRC_RE = re.compile(r'src="\/wiki')
 | |
| 
 | |
| DEFAULT_ON_CREATE = lambda _: None
 | |
| 
 | |
| def relativize_links(content:str, depth:int) -> str:
 | |
|     """Given compiled html content, change URLs that start in "/wiki" to be
 | |
|     relative instead of absolute. Depth indicates how many pairs of dots we
 | |
|     should use to traverse upward."""
 | |
|     dots = os.path.join(*['..' for _ in range(depth)])
 | |
|     href_repl = 'href="{}'.format(os.path.join(dots, 'wiki'))
 | |
|     src_repl = 'src="{}'.format(os.path.join(dots, 'wiki'))
 | |
|     out = re.sub(LINK_RE, href_repl, content)
 | |
|     return re.sub(SRC_RE, src_repl, out)
 | |
| 
 | |
| def depth_from(root: str, path: str) -> int:
 | |
|     """Given a root path and a path below it, returns how many levels below
 | |
|     the root the path is."""
 | |
|     if root == path:
 | |
|       return 1
 | |
|     relpath = os.path.relpath(path, root)
 | |
|     first = os.path.split(relpath)[0]
 | |
|     depth = 2
 | |
|     while first != '':
 | |
|       depth += 1
 | |
|       first = os.path.split(first)[0]
 | |
|     return depth
 | |
| 
 | |
| def generate_toc(header_content, articles):
 | |
|     """given header_content and a list of dicts with keys title, href, and path this function
 | |
|     generates the toc page's content"""
 | |
| 
 | |
|     toc_content = '{}\n'.format(update_title(header_content, 'table of contents'))
 | |
| 
 | |
|     toplevel_articles = [a for a in articles if a['path'] == '']
 | |
|     articles = [a for a in articles if a['path'] != '']
 | |
|     sorted(articles, key=lambda a: a['path'])
 | |
|     toc_content += '<h1>Table of Contents</h1>\n'
 | |
|     toc_content += '<h2>unsorted articles</h2>\n<ul>\n'
 | |
|     for a in toplevel_articles:
 | |
|         toc_content += '<li><a href="{}">{}</a></li>\n'.format(a['href'], a['title'])
 | |
| 
 | |
|     seen = set()
 | |
|     for article in articles:
 | |
|         if article['path'] not in seen:
 | |
|             path = article['path']
 | |
|             components = path.split('/')
 | |
|             hlvl = len(components) + 1
 | |
|             toc_content += '</ul>'
 | |
|             toc_content += f'<h{hlvl}>' + path.split('/')[-1] + f'</h{hlvl}>'
 | |
|             toc_content += '<ul>'
 | |
|             seen.add(path)
 | |
|         toc_content += '<li><a href="{href}">{title}</a></li>'.format(**article)
 | |
| 
 | |
|     toc_content += '</ul>'
 | |
|     return toc_content
 | |
| 
 | |
| def compile_wiki(source_path: str,
 | |
|                  dest_path: str,
 | |
|                  on_create: Callable[[str], None]=DEFAULT_ON_CREATE) -> None:
 | |
|     """Given a source path (presumably a git repository) and a destination
 | |
|     path, compiles the files found in {source_path}/articles and compiles them all
 | |
|     to {dest_path}/.
 | |
| 
 | |
|     THIS FUNCTION CLEARS {dest_path}/!
 | |
| 
 | |
|     Be absolutely sure you know what you are doing when you call this ^_^
 | |
| 
 | |
|     If passed, on_create will be called per directory and file created by the
 | |
|     compiler. The default is to take no action.
 | |
|     """
 | |
|     last_compiled = '<hr><p><em>last compiled: {}</em></p>'.format(datetime.utcnow())
 | |
| 
 | |
|     header_content = compile_markdown(os.path.join(source_path, 'src/header.md'))
 | |
|     footer_content = last_compiled + compile_markdown(os.path.join(source_path, 'src/footer.md'))
 | |
|     logo_path = os.path.join(source_path, 'src/logo.png')
 | |
|     css_path = os.path.join(source_path, 'src/main.css')
 | |
| 
 | |
|     articles_root = os.path.join(source_path, 'src/articles')
 | |
| 
 | |
|     articles = []
 | |
| 
 | |
|     copy(logo_path, dest_path)
 | |
|     copy(css_path, dest_path)
 | |
| 
 | |
|     for source_root, dirs, files in os.walk(articles_root):
 | |
|         depth = depth_from(articles_root, source_root)
 | |
|         current_suffix = source_root.replace(articles_root, '')
 | |
|         if current_suffix and current_suffix[0] == '/':
 | |
|             current_suffix = current_suffix[1:]
 | |
| 
 | |
|         dest_root = os.path.join(dest_path, current_suffix)
 | |
| 
 | |
|         for directory in dirs:
 | |
|             dir_path = os.path.join(dest_root, directory)
 | |
|             os.mkdir(dir_path)
 | |
|             on_create(dir_path)
 | |
| 
 | |
|         for source_filename in files:
 | |
|             if source_filename.startswith('.'):
 | |
|                 continue
 | |
|             source_file_path = os.path.join(source_root, source_filename)
 | |
|             output = compile_source_file(
 | |
|                 source_file_path,
 | |
|                 header_content,
 | |
|                 footer_content)
 | |
|             output = relativize_links(output, depth)
 | |
|             dest_filename = source_filename.split('.')[0] + '.html'
 | |
|             articles.append({
 | |
|                 'title': dest_filename.split('.')[0],
 | |
|                 'href': os.path.join(current_suffix, dest_filename),
 | |
|                 'path': current_suffix})
 | |
|             final_path = os.path.join(dest_root, dest_filename)
 | |
|             with open(final_path, 'w') as f:
 | |
|                 f.write(output)
 | |
|             on_create(final_path)
 | |
| 
 | |
| 
 | |
|     toc_content = generate_toc(header_content, articles)
 | |
|     toc_path = os.path.join(dest_path, 'toc.html')
 | |
|     with open(toc_path, 'w') as f:
 | |
|         f.write(relativize_links(toc_content, 1))
 | |
|         f.write(footer_content)
 | |
|     on_create(toc_path)
 | |
| 
 | |
| def slurp(file_path:str) -> str:
 | |
|     """Convenience function for reading a file and returning its contents."""
 | |
|     content = None
 | |
|     with open(file_path, 'r') as f:
 | |
|         content = f.read()
 | |
|     return content
 | |
| 
 | |
| def compile_source_file(source_file_path:str, header_content:str, footer_content:str) -> str:
 | |
|     """Given a path to a source file, this function:
 | |
|     - picks an appropriate compiler for the file extension
 | |
|     - compiles the file
 | |
|     - sandwiches it between the provided header and footer content
 | |
|     - returns the constructed string
 | |
|     """
 | |
|     if not os.path.isabs(source_file_path):
 | |
|         raise ValueError(
 | |
|             '{} is not an absolute path.'.format(source_file_path))
 | |
| 
 | |
|     # pick a compiler
 | |
|     if source_file_path.endswith('.md'):
 | |
|         compiler = compile_markdown
 | |
|     elif source_file_path.endswith('.txt'):
 | |
|         compiler = compile_plaintext
 | |
|     else:
 | |
|         # this just copies through any files that we don't recognize as needing
 | |
|         # conversion.
 | |
|         compiler = slurp
 | |
| 
 | |
|     content = compiler(source_file_path)
 | |
| 
 | |
|     title = extract_title(content)
 | |
|     if title is not None:
 | |
|         header_content = update_title(header_content, title)
 | |
| 
 | |
|     return '{}\n{}\n{}'.format(header_content, content, footer_content)
 | |
| 
 | |
| def update_title(content:str, title:str) -> str:
 | |
|     """Given a chunk of HTML, finds, updates, and returns the title element to
 | |
|     be the given title. If there is no title element, the content is returned
 | |
|     unmodified."""
 | |
|     return re.sub(TITLE_RE, '<title>{}</title>'.format(title), content)
 | |
| 
 | |
| def extract_title(content:str) -> Optional[str]:
 | |
|     """Given a string of page content, look for a header in the first line.
 | |
|     Returns it if found; returns None otherwise."""
 | |
|     first_line = content.split('\n')[0]
 | |
|     matches = re.match(HEADER_TITLE_RE, first_line)
 | |
|     if matches is not None:
 | |
|         return matches.groups()[1]
 | |
|     return None
 | |
| 
 | |
| def compile_markdown(file_path:str) -> str:
 | |
|     """Given a string of markdown, compiles it and returns the result."""
 | |
|     return markdown(
 | |
|         slurp(file_path),
 | |
|         output_format='html5')
 | |
| 
 | |
| def compile_plaintext(file_path:str) -> str:
 | |
|     output = '<p>\n'
 | |
|     output += re.sub(
 | |
|         DOUBLE_NEWLINE_RE,
 | |
|         '</p><p>',
 | |
|         slurp(file_path))
 | |
|     output += '\n</p>\n'
 | |
|     return output
 |