sand down some edge cases and issues in the segment parser

pull/4/head
Blake DeMarcy 2017-04-29 00:55:47 -05:00
parent d5af4413e9
commit 5ff2cf7fbe
1 changed files with 23 additions and 16 deletions

View File

@ -62,6 +62,7 @@ Just like the brackets themselves, backslashes may occur freely within bodies,
they are only removed when they occur before a valid expression. they are only removed when they occur before a valid expression.
""" """
from string import punctuation
import re import re
colors = [ colors = [
@ -89,6 +90,13 @@ def apply_directives(text):
return escapes.sub(lambda m: m.group(1), text) return escapes.sub(lambda m: m.group(1), text)
def linequote_p(line):
if not line.startswith(">"):
return False
_fp = line.find(" ")
return not quotes.search(line[:_fp] if _fp != -1 else line)
def parse_segments(text, sanitize_linequotes=True): def parse_segments(text, sanitize_linequotes=True):
""" """
Parse linequotes, quotes, and paragraphs into their appropriate Parse linequotes, quotes, and paragraphs into their appropriate
@ -98,43 +106,42 @@ def parse_segments(text, sanitize_linequotes=True):
""" """
result = list() result = list()
hard_quote = False hard_quote = False
for paragraph in [p.strip() for p in re.split("\n{2,}", text)]: for paragraph in re.split("\n{2,}", text):
pg = str() pg = str()
for segment in [s for s in paragraph.split("\n")]: for line in paragraph.split("\n"):
if not segment: if line == "```":
if hard_quote:
pg += "\n"
continue
elif segment == "```":
# because of this lazy way of handling it, # because of this lazy way of handling it,
# its not actually necessary to close a # its not actually necessary to close a
# hard quote segment. i guess thats a positive # hard quote segment. i guess thats a positive
# just because i dont have to throw syntax # just because i dont have to throw syntax
# errors at the users for it. feels dirty # errors at the users for it. feels dirty
# but its easier for all of us. # but its easier for all of us.
if hard_quote:
pg += "\n"
hard_quote = not hard_quote hard_quote = not hard_quote
continue continue
elif hard_quote: elif hard_quote:
pg += segment + "\n" pg += "\n" + line
continue continue
_fp = segment.find(" ") elif not line:
first_word = segment[:_fp] if _fp != -1 else segment continue
if segment.startswith(">") and not quotes.search(first_word):
if linequote_p(line):
if sanitize_linequotes: if sanitize_linequotes:
inner = segment.replace("]", "\\]") inner = line.replace("]", "\\]")
else: else:
inner = apply_directives(segment) inner = apply_directives(line)
pg += "[linequote: %s]" % inner.strip() pg += "[linequote: %s]" % inner.strip()
else: else:
pg += apply_directives(segment.strip()) + " " sep = "\n" if line[0] in punctuation else " "
pg += apply_directives(line.rstrip()) + sep
result.append(pg.strip()) result.append(pg.rstrip())
return result return result