From b9b607b8d81bfb571475d998689f39e1ac06311c Mon Sep 17 00:00:00 2001
From: nathaniel smith <nathanielksmith@gmail.com>
Date: Thu, 31 Aug 2017 00:48:48 -0700
Subject: [PATCH] title extraction

---
 tildewiki/main.py | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/tildewiki/main.py b/tildewiki/main.py
index cf90fb6..04da65a 100644
--- a/tildewiki/main.py
+++ b/tildewiki/main.py
@@ -20,6 +20,9 @@ LOCAL_REPOSITORY_PATH = expanduser('~/wiki')
 REPOSITORY_PATH = '/wiki'
 
 DOUBLE_NEWLINE_RE = re.compile(r'\n\n', flags=re.MULTILINE|re.DOTALL)
+HEADER_TITLE_RE = re.compile(r'<h([12])>(.*?)</h\1>')
+TITLE_RE = re.compile(r'<title>.*?</title>')
+
 DEFAULT_PATH_KWARGS = dict(
     exists=True,
     writable=True,
@@ -223,11 +226,24 @@ def compile_source_file(source_file_path, header_content, footer_content):
 
     content = compiler(source_file_path)
 
-    # TODO extract title from content; will probably mean not just blindly
-    # taking a header_content
+    title = extract_title(content)
+    if title is not None:
+        header_content = re.sub(
+            TITLE_RE,
+            '<title>{}</title>'.format(title),
+            header_content)
 
     return '{}\n{}\n{}'.format(header_content, content, footer_content)
 
+def extract_title(content):
+    """Given a string of page content, look for a header in the first line.
+    Returns it if found; returns None otherwise."""
+    first_line = content.split('\n')[0]
+    matches = re.match(HEADER_TITLE_RE, first_line)
+    if matches is not None:
+        return matches.groups()[1]
+    return None
+
 def compile_markdown(source_file_path):
     return markdown(
         slurp(source_file_path),