title extraction

master
nathaniel smith 2017-08-31 00:48:48 -07:00
parent e53a8a7180
commit b9b607b8d8
1 changed files with 18 additions and 2 deletions

View File

@ -20,6 +20,9 @@ LOCAL_REPOSITORY_PATH = expanduser('~/wiki')
REPOSITORY_PATH = '/wiki' REPOSITORY_PATH = '/wiki'
DOUBLE_NEWLINE_RE = re.compile(r'\n\n', flags=re.MULTILINE|re.DOTALL) DOUBLE_NEWLINE_RE = re.compile(r'\n\n', flags=re.MULTILINE|re.DOTALL)
HEADER_TITLE_RE = re.compile(r'<h([12])>(.*?)</h\1>')
TITLE_RE = re.compile(r'<title>.*?</title>')
DEFAULT_PATH_KWARGS = dict( DEFAULT_PATH_KWARGS = dict(
exists=True, exists=True,
writable=True, writable=True,
@ -223,11 +226,24 @@ def compile_source_file(source_file_path, header_content, footer_content):
content = compiler(source_file_path) content = compiler(source_file_path)
# TODO extract title from content; will probably mean not just blindly title = extract_title(content)
# taking a header_content if title is not None:
header_content = re.sub(
TITLE_RE,
'<title>{}</title>'.format(title),
header_content)
return '{}\n{}\n{}'.format(header_content, content, footer_content) return '{}\n{}\n{}'.format(header_content, content, footer_content)
def extract_title(content):
"""Given a string of page content, look for a header in the first line.
Returns it if found; returns None otherwise."""
first_line = content.split('\n')[0]
matches = re.match(HEADER_TITLE_RE, first_line)
if matches is not None:
return matches.groups()[1]
return None
def compile_markdown(source_file_path): def compile_markdown(source_file_path):
return markdown( return markdown(
slurp(source_file_path), slurp(source_file_path),