ttbp/bin/mistune.py

1155 lines
35 KiB
Python

# coding: utf-8
"""
mistune
~~~~~~~
The fastest markdown parser in pure Python with renderer feature.
:copyright: (c) 2014 - 2015 by Hsiaoming Yang.
"""
import re
import inspect
__version__ = '0.7.2'
__author__ = 'Hsiaoming Yang <me@lepture.com>'
__all__ = [
'BlockGrammar', 'BlockLexer',
'InlineGrammar', 'InlineLexer',
'Renderer', 'Markdown',
'markdown', 'escape',
]
_key_pattern = re.compile(r'\s+')
_nonalpha_pattern = re.compile(r'\W')
_escape_pattern = re.compile(r'&(?!#?\w+;)')
_newline_pattern = re.compile(r'\r\n|\r')
_block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
_block_code_leading_pattern = re.compile(r'^ {4}', re.M)
_inline_tags = [
'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data',
'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark',
'ruby', 'rt', 'rp', 'bdi', 'bdo', 'span', 'br', 'wbr', 'ins', 'del',
'img', 'font',
]
_pre_tags = ['pre', 'script', 'style']
_valid_end = r'(?!:/|[^\w\s@]*@)\b'
_valid_attr = r'''"[^"]*"|'[^']*'|[^'">]'''
_block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
_scheme_blacklist = ('javascript', 'data', 'vbscript')
def _pure_pattern(regex):
pattern = regex.pattern
if pattern.startswith('^'):
pattern = pattern[1:]
return pattern
def _keyify(key):
return _key_pattern.sub(' ', key.lower())
def escape(text, quote=False, smart_amp=True):
"""Replace special characters "&", "<" and ">" to HTML-safe sequences.
The original cgi.escape will always escape "&", but you can control
this one for a smart escape amp.
:param quote: if set to True, " and ' will be escaped.
:param smart_amp: if set to False, & will always be escaped.
"""
if smart_amp:
text = _escape_pattern.sub('&amp;', text)
else:
text = text.replace('&', '&amp;')
text = text.replace('<', '&lt;')
text = text.replace('>', '&gt;')
if quote:
text = text.replace('"', '&quot;')
text = text.replace("'", '&#39;')
return text
def escape_link(url, **kwargs):
"""Remove dangerous URL schemes like javascript: and escape afterwards."""
if ':' in url:
scheme, _ = url.split(':', 1)
scheme = _nonalpha_pattern.sub('', scheme)
# whitelist would be better but mistune's use case is too general
if scheme.lower() in _scheme_blacklist:
return ''
# escape &entities; to &amp;entities;
kwargs['smart_amp'] = False
return escape(url, **kwargs)
def preprocessing(text, tab=4):
text = _newline_pattern.sub('\n', text)
text = text.replace('\t', ' ' * tab)
text = text.replace('\u00a0', ' ')
text = text.replace('\u2424', '\n')
pattern = re.compile(r'^ +$', re.M)
return pattern.sub('', text)
class BlockGrammar(object):
"""Grammars for block level tokens."""
def_links = re.compile(
r'^ *\[([^^\]]+)\]: *' # [key]:
r'<?([^\s>]+)>?' # <link> or link
r'(?: +["(]([^\n]+)[")])? *(?:\n+|$)'
)
def_footnotes = re.compile(
r'^\[\^([^\]]+)\]: *('
r'[^\n]*(?:\n+|$)' # [^key]:
r'(?: {1,}[^\n]*(?:\n+|$))*'
r')'
)
newline = re.compile(r'^\n+')
block_code = re.compile(r'^( {4}[^\n]+\n*)+')
fences = re.compile(
r'^ *(`{3,}|~{3,}) *(\S+)? *\n' # ```lang
r'([\s\S]+?)\s*'
r'\1 *(?:\n+|$)' # ```
)
hrule = re.compile(r'^ {0,3}[-*_](?: *[-*_]){2,} *(?:\n+|$)')
heading = re.compile(r'^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)')
lheading = re.compile(r'^([^\n]+)\n *(=|-)+ *(?:\n+|$)')
block_quote = re.compile(r'^( *>[^\n]+(\n[^\n]+)*\n*)+')
list_block = re.compile(
r'^( *)([*+-]|\d+\.) [\s\S]+?'
r'(?:'
r'\n+(?=\1?(?:[-*_] *){3,}(?:\n+|$))' # hrule
r'|\n+(?=%s)' # def links
r'|\n+(?=%s)' # def footnotes
r'|\n{2,}'
r'(?! )'
r'(?!\1(?:[*+-]|\d+\.) )\n*'
r'|'
r'\s*$)' % (
_pure_pattern(def_links),
_pure_pattern(def_footnotes),
)
)
list_item = re.compile(
r'^(( *)(?:[*+-]|\d+\.) [^\n]*'
r'(?:\n(?!\2(?:[*+-]|\d+\.) )[^\n]*)*)',
flags=re.M
)
list_bullet = re.compile(r'^ *(?:[*+-]|\d+\.) +')
paragraph = re.compile(
r'^((?:[^\n]+\n?(?!'
r'%s|%s|%s|%s|%s|%s|%s|%s|%s'
r'))+)\n*' % (
_pure_pattern(fences).replace(r'\1', r'\2'),
_pure_pattern(list_block).replace(r'\1', r'\3'),
_pure_pattern(hrule),
_pure_pattern(heading),
_pure_pattern(lheading),
_pure_pattern(block_quote),
_pure_pattern(def_links),
_pure_pattern(def_footnotes),
'<' + _block_tag,
)
)
block_html = re.compile(
r'^ *(?:%s|%s|%s) *(?:\n{2,}|\s*$)' % (
r'<!--[\s\S]*?-->',
r'<(%s)((?:%s)*?)>([\s\S]+?)<\/\1>' % (_block_tag, _valid_attr),
r'<%s(?:%s)*?>' % (_block_tag, _valid_attr),
)
)
table = re.compile(
r'^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*'
)
nptable = re.compile(
r'^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*'
)
text = re.compile(r'^[^\n]+')
class BlockLexer(object):
"""Block level lexer for block grammars."""
grammar_class = BlockGrammar
default_rules = [
'newline', 'hrule', 'block_code', 'fences', 'heading',
'nptable', 'lheading', 'block_quote',
'list_block', 'block_html', 'def_links',
'def_footnotes', 'table', 'paragraph', 'text'
]
list_rules = (
'newline', 'block_code', 'fences', 'lheading', 'hrule',
'block_quote', 'list_block', 'block_html', 'text',
)
footnote_rules = (
'newline', 'block_code', 'fences', 'heading',
'nptable', 'lheading', 'hrule', 'block_quote',
'list_block', 'block_html', 'table', 'paragraph', 'text'
)
def __init__(self, rules=None, **kwargs):
self.tokens = []
self.def_links = {}
self.def_footnotes = {}
if not rules:
rules = self.grammar_class()
self.rules = rules
def __call__(self, text, rules=None):
return self.parse(text, rules)
def parse(self, text, rules=None):
text = text.rstrip('\n')
if not rules:
rules = self.default_rules
def manipulate(text):
for key in rules:
rule = getattr(self.rules, key)
m = rule.match(text)
if not m:
continue
getattr(self, 'parse_%s' % key)(m)
return m
return False # pragma: no cover
while text:
m = manipulate(text)
if m is not False:
text = text[len(m.group(0)):]
continue
if text: # pragma: no cover
raise RuntimeError('Infinite loop at: %s' % text)
return self.tokens
def parse_newline(self, m):
length = len(m.group(0))
if length > 1:
self.tokens.append({'type': 'newline'})
def parse_block_code(self, m):
# clean leading whitespace
code = _block_code_leading_pattern.sub('', m.group(0))
self.tokens.append({
'type': 'code',
'lang': None,
'text': code,
})
def parse_fences(self, m):
self.tokens.append({
'type': 'code',
'lang': m.group(2),
'text': m.group(3),
})
def parse_heading(self, m):
self.tokens.append({
'type': 'heading',
'level': len(m.group(1)),
'text': m.group(2),
})
def parse_lheading(self, m):
"""Parse setext heading."""
self.tokens.append({
'type': 'heading',
'level': 1 if m.group(2) == '=' else 2,
'text': m.group(1),
})
def parse_hrule(self, m):
self.tokens.append({'type': 'hrule'})
def parse_list_block(self, m):
bull = m.group(2)
self.tokens.append({
'type': 'list_start',
'ordered': '.' in bull,
})
cap = m.group(0)
self._process_list_item(cap, bull)
self.tokens.append({'type': 'list_end'})
def _process_list_item(self, cap, bull):
cap = self.rules.list_item.findall(cap)
_next = False
length = len(cap)
for i in range(length):
item = cap[i][0]
# remove the bullet
space = len(item)
item = self.rules.list_bullet.sub('', item)
# outdent
if '\n ' in item:
space = space - len(item)
pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
item = pattern.sub('', item)
# determine whether item is loose or not
loose = _next
if not loose and re.search(r'\n\n(?!\s*$)', item):
loose = True
rest = len(item)
if i != length - 1 and rest:
_next = item[rest-1] == '\n'
if not loose:
loose = _next
if loose:
t = 'loose_item_start'
else:
t = 'list_item_start'
self.tokens.append({'type': t})
# recurse
self.parse(item, self.list_rules)
self.tokens.append({'type': 'list_item_end'})
def parse_block_quote(self, m):
self.tokens.append({'type': 'block_quote_start'})
# clean leading >
cap = _block_quote_leading_pattern.sub('', m.group(0))
self.parse(cap)
self.tokens.append({'type': 'block_quote_end'})
def parse_def_links(self, m):
key = _keyify(m.group(1))
self.def_links[key] = {
'link': m.group(2),
'title': m.group(3),
}
def parse_def_footnotes(self, m):
key = _keyify(m.group(1))
if key in self.def_footnotes:
# footnote is already defined
return
self.def_footnotes[key] = 0
self.tokens.append({
'type': 'footnote_start',
'key': key,
})
text = m.group(2)
if '\n' in text:
lines = text.split('\n')
whitespace = None
for line in lines[1:]:
space = len(line) - len(line.lstrip())
if space and (not whitespace or space < whitespace):
whitespace = space
newlines = [lines[0]]
for line in lines[1:]:
newlines.append(line[whitespace:])
text = '\n'.join(newlines)
self.parse(text, self.footnote_rules)
self.tokens.append({
'type': 'footnote_end',
'key': key,
})
def parse_table(self, m):
item = self._process_table(m)
cells = re.sub(r'(?: *\| *)?\n$', '', m.group(3))
cells = cells.split('\n')
for i, v in enumerate(cells):
v = re.sub(r'^ *\| *| *\| *$', '', v)
cells[i] = re.split(r' *\| *', v)
item['cells'] = cells
self.tokens.append(item)
def parse_nptable(self, m):
item = self._process_table(m)
cells = re.sub(r'\n$', '', m.group(3))
cells = cells.split('\n')
for i, v in enumerate(cells):
cells[i] = re.split(r' *\| *', v)
item['cells'] = cells
self.tokens.append(item)
def _process_table(self, m):
header = re.sub(r'^ *| *\| *$', '', m.group(1))
header = re.split(r' *\| *', header)
align = re.sub(r' *|\| *$', '', m.group(2))
align = re.split(r' *\| *', align)
for i, v in enumerate(align):
if re.search(r'^ *-+: *$', v):
align[i] = 'right'
elif re.search(r'^ *:-+: *$', v):
align[i] = 'center'
elif re.search(r'^ *:-+ *$', v):
align[i] = 'left'
else:
align[i] = None
item = {
'type': 'table',
'header': header,
'align': align,
}
return item
def parse_block_html(self, m):
tag = m.group(1)
if not tag:
text = m.group(0)
self.tokens.append({
'type': 'close_html',
'text': text
})
else:
attr = m.group(2)
text = m.group(3)
self.tokens.append({
'type': 'open_html',
'tag': tag,
'extra': attr,
'text': text
})
def parse_paragraph(self, m):
text = m.group(1).rstrip('\n')
self.tokens.append({'type': 'paragraph', 'text': text})
def parse_text(self, m):
text = m.group(0)
self.tokens.append({'type': 'text', 'text': text})
class InlineGrammar(object):
"""Grammars for inline level tokens."""
escape = re.compile(r'^\\([\\`*{}\[\]()#+\-.!_>~|])') # \* \+ \! ....
inline_html = re.compile(
r'^(?:%s|%s|%s)' % (
r'<!--[\s\S]*?-->',
r'<(\w+%s)((?:%s)*?)>([\s\S]*?)<\/\1>' % (_valid_end, _valid_attr),
r'<\w+%s(?:%s)*?>' % (_valid_end, _valid_attr),
)
)
autolink = re.compile(r'^<([^ >]+(@|:)[^ >]+)>')
link = re.compile(
r'^!?\[('
r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
r')\]\('
r'''\s*(<)?([\s\S]*?)(?(2)>)(?:\s+['"]([\s\S]*?)['"])?\s*'''
r'\)'
)
reflink = re.compile(
r'^!?\[('
r'(?:\[[^^\]]*\]|[^\[\]]|\](?=[^\[]*\]))*'
r')\]\s*\[([^^\]]*)\]'
)
nolink = re.compile(r'^!?\[((?:\[[^\]]*\]|[^\[\]])*)\]')
url = re.compile(r'''^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])''')
double_emphasis = re.compile(
r'^_{2}([\s\S]+?)_{2}(?!_)' # __word__
r'|'
r'^\*{2}([\s\S]+?)\*{2}(?!\*)' # **word**
)
emphasis = re.compile(
r'^\b_((?:__|[^_])+?)_\b' # _word_
r'|'
r'^\*((?:\*\*|[^\*])+?)\*(?!\*)' # *word*
)
code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)') # `code`
linebreak = re.compile(r'^ {2,}\n(?!\s*$)')
strikethrough = re.compile(r'^~~(?=\S)([\s\S]*?\S)~~') # ~~word~~
footnote = re.compile(r'^\[\^([^\]]+)\]')
text = re.compile(r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| {2,}\n|$)')
def hard_wrap(self):
"""Grammar for hard wrap linebreak. You don't need to add two
spaces at the end of a line.
"""
self.linebreak = re.compile(r'^ *\n(?!\s*$)')
self.text = re.compile(
r'^[\s\S]+?(?=[\\<!\[_*`~]|https?://| *\n|$)'
)
class InlineLexer(object):
"""Inline level lexer for inline grammars."""
grammar_class = InlineGrammar
default_rules = [
'escape', 'inline_html', 'autolink', 'url',
'footnote', 'link', 'reflink', 'nolink',
'double_emphasis', 'emphasis', 'code',
'linebreak', 'strikethrough', 'text',
]
inline_html_rules = [
'escape', 'autolink', 'url', 'link', 'reflink',
'nolink', 'double_emphasis', 'emphasis', 'code',
'linebreak', 'strikethrough', 'text',
]
def __init__(self, renderer, rules=None, **kwargs):
self.renderer = renderer
self.links = {}
self.footnotes = {}
self.footnote_index = 0
if not rules:
rules = self.grammar_class()
kwargs.update(self.renderer.options)
if kwargs.get('hard_wrap'):
rules.hard_wrap()
self.rules = rules
self._in_link = False
self._in_footnote = False
self._parse_inline_html = kwargs.get('parse_inline_html')
def __call__(self, text, rules=None):
return self.output(text, rules)
def setup(self, links, footnotes):
self.footnote_index = 0
self.links = links or {}
self.footnotes = footnotes or {}
def output(self, text, rules=None):
text = text.rstrip('\n')
if not rules:
rules = list(self.default_rules)
if self._in_footnote and 'footnote' in rules:
rules.remove('footnote')
output = self.renderer.placeholder()
def manipulate(text):
for key in rules:
pattern = getattr(self.rules, key)
m = pattern.match(text)
if not m:
continue
self.line_match = m
out = getattr(self, 'output_%s' % key)(m)
if out is not None:
return m, out
return False # pragma: no cover
self.line_started = False
while text:
ret = manipulate(text)
self.line_started = True
if ret is not False:
m, out = ret
output += out
text = text[len(m.group(0)):]
continue
if text: # pragma: no cover
raise RuntimeError('Infinite loop at: %s' % text)
return output
def output_escape(self, m):
return m.group(1)
def output_autolink(self, m):
link = m.group(1)
if m.group(2) == '@':
is_email = True
else:
is_email = False
return self.renderer.autolink(link, is_email)
def output_url(self, m):
link = m.group(1)
if self._in_link:
return self.renderer.text(link)
return self.renderer.autolink(link, False)
def output_inline_html(self, m):
tag = m.group(1)
if self._parse_inline_html and tag in _inline_tags:
text = m.group(3)
if tag == 'a':
self._in_link = True
text = self.output(text, rules=self.inline_html_rules)
self._in_link = False
else:
text = self.output(text, rules=self.inline_html_rules)
extra = m.group(2) or ''
html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
else:
html = m.group(0)
return self.renderer.inline_html(html)
def output_footnote(self, m):
key = _keyify(m.group(1))
if key not in self.footnotes:
return None
if self.footnotes[key]:
return None
self.footnote_index += 1
self.footnotes[key] = self.footnote_index
return self.renderer.footnote_ref(key, self.footnote_index)
def output_link(self, m):
return self._process_link(m, m.group(3), m.group(4))
def output_reflink(self, m):
key = _keyify(m.group(2) or m.group(1))
if key not in self.links:
return None
ret = self.links[key]
return self._process_link(m, ret['link'], ret['title'])
def output_nolink(self, m):
key = _keyify(m.group(1))
if key not in self.links:
return None
ret = self.links[key]
return self._process_link(m, ret['link'], ret['title'])
def _process_link(self, m, link, title=None):
line = m.group(0)
text = m.group(1)
if line[0] == '!':
return self.renderer.image(link, title, text)
self._in_link = True
text = self.output(text)
self._in_link = False
return self.renderer.link(link, title, text)
def output_double_emphasis(self, m):
text = m.group(2) or m.group(1)
text = self.output(text)
return self.renderer.double_emphasis(text)
def output_emphasis(self, m):
text = m.group(2) or m.group(1)
text = self.output(text)
return self.renderer.emphasis(text)
def output_code(self, m):
text = m.group(2)
return self.renderer.codespan(text)
def output_linebreak(self, m):
return self.renderer.linebreak()
def output_strikethrough(self, m):
text = self.output(m.group(1))
return self.renderer.strikethrough(text)
def output_text(self, m):
text = m.group(0)
return self.renderer.text(text)
class Renderer(object):
"""The default HTML renderer for rendering Markdown.
"""
def __init__(self, **kwargs):
self.options = kwargs
def placeholder(self):
"""Returns the default, empty output value for the renderer.
All renderer methods use the '+=' operator to append to this value.
Default is a string so rendering HTML can build up a result string with
the rendered Markdown.
Can be overridden by Renderer subclasses to be types like an empty
list, allowing the renderer to create a tree-like structure to
represent the document (which can then be reprocessed later into a
separate format like docx or pdf).
"""
return ''
def block_code(self, code, lang=None):
"""Rendering block level code. ``pre > code``.
:param code: text content of the code block.
:param lang: language of the given code.
"""
code = code.rstrip('\n')
if not lang:
code = escape(code, smart_amp=False)
return '<pre><code>%s\n</code></pre>\n' % code
code = escape(code, quote=True, smart_amp=False)
return '<pre><code class="lang-%s">%s\n</code></pre>\n' % (lang, code)
def block_quote(self, text):
"""Rendering <blockquote> with the given text.
:param text: text content of the blockquote.
"""
return '<blockquote>%s\n</blockquote>\n' % text.rstrip('\n')
def block_html(self, html):
"""Rendering block level pure html content.
:param html: text content of the html snippet.
"""
if self.options.get('skip_style') and \
html.lower().startswith('<style'):
return ''
if self.options.get('escape'):
return escape(html)
return html
def header(self, text, level, raw=None):
"""Rendering header/heading tags like ``<h1>`` ``<h2>``.
:param text: rendered text content for the header.
:param level: a number for the header level, for example: 1.
:param raw: raw text content of the header.
"""
return '<h%d>%s</h%d>\n' % (level, text, level)
def hrule(self):
"""Rendering method for ``<hr>`` tag."""
if self.options.get('use_xhtml'):
return '<hr />\n'
return '<hr>\n'
def list(self, body, ordered=True):
"""Rendering list tags like ``<ul>`` and ``<ol>``.
:param body: body contents of the list.
:param ordered: whether this list is ordered or not.
"""
tag = 'ul'
if ordered:
tag = 'ol'
return '<%s>\n%s</%s>\n' % (tag, body, tag)
def list_item(self, text):
"""Rendering list item snippet. Like ``<li>``."""
return '<li>%s</li>\n' % text
def paragraph(self, text):
"""Rendering paragraph tags. Like ``<p>``."""
return '<p>%s</p>\n' % text.strip(' ')
def table(self, header, body):
"""Rendering table element. Wrap header and body in it.
:param header: header part of the table.
:param body: body part of the table.
"""
return (
'<table>\n<thead>%s</thead>\n'
'<tbody>\n%s</tbody>\n</table>\n'
) % (header, body)
def table_row(self, content):
"""Rendering a table row. Like ``<tr>``.
:param content: content of current table row.
"""
return '<tr>\n%s</tr>\n' % content
def table_cell(self, content, **flags):
"""Rendering a table cell. Like ``<th>`` ``<td>``.
:param content: content of current table cell.
:param header: whether this is header or not.
:param align: align of current table cell.
"""
if flags['header']:
tag = 'th'
else:
tag = 'td'
align = flags['align']
if not align:
return '<%s>%s</%s>\n' % (tag, content, tag)
return '<%s style="text-align:%s">%s</%s>\n' % (
tag, align, content, tag
)
def double_emphasis(self, text):
"""Rendering **strong** text.
:param text: text content for emphasis.
"""
return '<strong>%s</strong>' % text
def emphasis(self, text):
"""Rendering *emphasis* text.
:param text: text content for emphasis.
"""
return '<em>%s</em>' % text
def codespan(self, text):
"""Rendering inline `code` text.
:param text: text content for inline code.
"""
text = escape(text.rstrip(), smart_amp=False)
return '<code>%s</code>' % text
def linebreak(self):
"""Rendering line break like ``<br>``."""
if self.options.get('use_xhtml'):
return '<br />\n'
return '<br>\n'
def strikethrough(self, text):
"""Rendering ~~strikethrough~~ text.
:param text: text content for strikethrough.
"""
return '<del>%s</del>' % text
def text(self, text):
"""Rendering unformatted text.
:param text: text content.
"""
return escape(text)
def autolink(self, link, is_email=False):
"""Rendering a given link or email address.
:param link: link content or email address.
:param is_email: whether this is an email or not.
"""
text = link = escape(link)
if is_email:
link = 'mailto:%s' % link
return '<a href="%s">%s</a>' % (link, text)
def link(self, link, title, text):
"""Rendering a given link with content and title.
:param link: href link for ``<a>`` tag.
:param title: title content for `title` attribute.
:param text: text content for description.
"""
link = escape_link(link, quote=True)
if not title:
return '<a href="%s">%s</a>' % (link, text)
title = escape(title, quote=True)
return '<a href="%s" title="%s">%s</a>' % (link, title, text)
def image(self, src, title, text):
"""Rendering a image with title and text.
:param src: source link of the image.
:param title: title text of the image.
:param text: alt text of the image.
"""
src = escape_link(src, quote=True)
text = escape(text, quote=True)
if title:
title = escape(title, quote=True)
html = '<img src="%s" alt="%s" title="%s"' % (src, text, title)
else:
html = '<img src="%s" alt="%s"' % (src, text)
if self.options.get('use_xhtml'):
return '%s />' % html
return '%s>' % html
def inline_html(self, html):
"""Rendering span level pure html content.
:param html: text content of the html snippet.
"""
if self.options.get('escape'):
return escape(html)
return html
def newline(self):
"""Rendering newline element."""
return ''
def footnote_ref(self, key, index):
"""Rendering the ref anchor of a footnote.
:param key: identity key for the footnote.
:param index: the index count of current footnote.
"""
html = (
'<sup class="footnote-ref" id="fnref-%s">'
'<a href="#fn-%s" rel="footnote">%d</a></sup>'
) % (escape(key), escape(key), index)
return html
def footnote_item(self, key, text):
"""Rendering a footnote item.
:param key: identity key for the footnote.
:param text: text content of the footnote.
"""
back = (
'<a href="#fnref-%s" rev="footnote">&#8617;</a>'
) % escape(key)
text = text.rstrip()
if text.endswith('</p>'):
text = re.sub(r'<\/p>$', r'%s</p>' % back, text)
else:
text = '%s<p>%s</p>' % (text, back)
html = '<li id="fn-%s">%s</li>\n' % (escape(key), text)
return html
def footnotes(self, text):
"""Wrapper for all footnotes.
:param text: contents of all footnotes.
"""
html = '<div class="footnotes">\n%s<ol>%s</ol>\n</div>\n'
return html % (self.hrule(), text)
class Markdown(object):
"""The Markdown parser.
:param renderer: An instance of ``Renderer``.
:param inline: An inline lexer class or instance.
:param block: A block lexer class or instance.
"""
def __init__(self, renderer=None, inline=None, block=None, **kwargs):
if not renderer:
renderer = Renderer(**kwargs)
else:
kwargs.update(renderer.options)
self.renderer = renderer
if inline and inspect.isclass(inline):
inline = inline(renderer, **kwargs)
if block and inspect.isclass(block):
block = block(**kwargs)
if inline:
self.inline = inline
else:
self.inline = InlineLexer(renderer, **kwargs)
self.block = block or BlockLexer(BlockGrammar())
self.footnotes = []
self.tokens = []
# detect if it should parse text in block html
self._parse_block_html = kwargs.get('parse_block_html')
def __call__(self, text):
return self.parse(text)
def render(self, text):
"""Render the Markdown text.
:param text: markdown formatted text content.
"""
return self.parse(text)
def parse(self, text):
out = self.output(preprocessing(text))
keys = self.block.def_footnotes
# reset block
self.block.def_links = {}
self.block.def_footnotes = {}
# reset inline
self.inline.links = {}
self.inline.footnotes = {}
if not self.footnotes:
return out
footnotes = filter(lambda o: keys.get(o['key']), self.footnotes)
self.footnotes = sorted(
footnotes, key=lambda o: keys.get(o['key']), reverse=True
)
body = self.renderer.placeholder()
while self.footnotes:
note = self.footnotes.pop()
body += self.renderer.footnote_item(
note['key'], note['text']
)
out += self.renderer.footnotes(body)
return out
def pop(self):
if not self.tokens:
return None
self.token = self.tokens.pop()
return self.token
def peek(self):
if self.tokens:
return self.tokens[-1]
return None # pragma: no cover
def output(self, text, rules=None):
self.tokens = self.block(text, rules)
self.tokens.reverse()
self.inline.setup(self.block.def_links, self.block.def_footnotes)
out = self.renderer.placeholder()
while self.pop():
out += self.tok()
return out
def tok(self):
t = self.token['type']
# sepcial cases
if t.endswith('_start'):
t = t[:-6]
return getattr(self, 'output_%s' % t)()
def tok_text(self):
text = self.token['text']
while self.peek()['type'] == 'text':
text += '\n' + self.pop()['text']
return self.inline(text)
def output_newline(self):
return self.renderer.newline()
def output_hrule(self):
return self.renderer.hrule()
def output_heading(self):
return self.renderer.header(
self.inline(self.token['text']),
self.token['level'],
self.token['text'],
)
def output_code(self):
return self.renderer.block_code(
self.token['text'], self.token['lang']
)
def output_table(self):
aligns = self.token['align']
aligns_length = len(aligns)
cell = self.renderer.placeholder()
# header part
header = self.renderer.placeholder()
for i, value in enumerate(self.token['header']):
align = aligns[i] if i < aligns_length else None
flags = {'header': True, 'align': align}
cell += self.renderer.table_cell(self.inline(value), **flags)
header += self.renderer.table_row(cell)
# body part
body = self.renderer.placeholder()
for i, row in enumerate(self.token['cells']):
cell = self.renderer.placeholder()
for j, value in enumerate(row):
align = aligns[j] if j < aligns_length else None
flags = {'header': False, 'align': align}
cell += self.renderer.table_cell(self.inline(value), **flags)
body += self.renderer.table_row(cell)
return self.renderer.table(header, body)
def output_block_quote(self):
body = self.renderer.placeholder()
while self.pop()['type'] != 'block_quote_end':
body += self.tok()
return self.renderer.block_quote(body)
def output_list(self):
ordered = self.token['ordered']
body = self.renderer.placeholder()
while self.pop()['type'] != 'list_end':
body += self.tok()
return self.renderer.list(body, ordered)
def output_list_item(self):
body = self.renderer.placeholder()
while self.pop()['type'] != 'list_item_end':
if self.token['type'] == 'text':
body += self.tok_text()
else:
body += self.tok()
return self.renderer.list_item(body)
def output_loose_item(self):
body = self.renderer.placeholder()
while self.pop()['type'] != 'list_item_end':
body += self.tok()
return self.renderer.list_item(body)
def output_footnote(self):
self.inline._in_footnote = True
body = self.renderer.placeholder()
key = self.token['key']
while self.pop()['type'] != 'footnote_end':
body += self.tok()
self.footnotes.append({'key': key, 'text': body})
self.inline._in_footnote = False
return self.renderer.placeholder()
def output_close_html(self):
text = self.token['text']
return self.renderer.block_html(text)
def output_open_html(self):
text = self.token['text']
tag = self.token['tag']
if self._parse_block_html and tag not in _pre_tags:
text = self.inline(text, rules=self.inline.inline_html_rules)
extra = self.token.get('extra') or ''
html = '<%s%s>%s</%s>' % (tag, extra, text, tag)
return self.renderer.block_html(html)
def output_paragraph(self):
return self.renderer.paragraph(self.inline(self.token['text']))
def output_text(self):
return self.renderer.paragraph(self.tok_text())
def markdown(text, escape=True, **kwargs):
"""Render markdown formatted text to html.
:param text: markdown formatted text content.
:param escape: if set to False, all html tags will not be escaped.
:param use_xhtml: output with xhtml tags.
:param hard_wrap: if set to True, it will use the GFM line breaks feature.
:param parse_block_html: parse text only in block level html.
:param parse_inline_html: parse text only in inline level html.
"""
return Markdown(escape=escape, **kwargs)(text)