2017-04-03 04:51:44 +00:00
|
|
|
"""
|
|
|
|
This module is not complete and none of its functions are currently
|
|
|
|
used elsewhere. Subject to major refactoring.
|
|
|
|
"""
|
|
|
|
|
2017-04-11 20:31:01 +00:00
|
|
|
test = """
|
|
|
|
This is a small paragraph
|
|
|
|
thats divided between a
|
|
|
|
few rows.
|
|
|
|
|
|
|
|
this opens a few linequotes.
|
|
|
|
>this is a few
|
|
|
|
>rows of
|
|
|
|
>sequential line breaks
|
|
|
|
and this is what follows right after
|
|
|
|
"""
|
|
|
|
|
|
|
|
# from markdown import markdown
|
|
|
|
# from html import escape
|
2017-03-01 21:54:34 +00:00
|
|
|
import re
|
|
|
|
|
2017-04-02 07:35:58 +00:00
|
|
|
colors = [
|
2017-04-11 20:31:01 +00:00
|
|
|
#0, 1 2 3 4 5 6
|
2017-04-05 21:33:25 +00:00
|
|
|
"red", "yellow", "green", "blue", "cyan", "magenta"
|
2017-04-02 07:35:58 +00:00
|
|
|
]
|
2017-03-04 01:56:08 +00:00
|
|
|
|
2017-04-02 07:35:58 +00:00
|
|
|
markup = [
|
2017-04-11 20:31:01 +00:00
|
|
|
"bold", "italic", "underline", "linequote", "quote", "rainbow"
|
2017-04-02 07:35:58 +00:00
|
|
|
]
|
2017-03-01 21:54:34 +00:00
|
|
|
|
2017-04-11 20:31:01 +00:00
|
|
|
# tokens being [red: this will be red] and [bold: this will be bold]
|
|
|
|
# tokens = re.compile(r"\[(%s): (.+?)]" % "|".join(colors + markup), flags=re.DOTALL)
|
2017-03-01 21:54:34 +00:00
|
|
|
|
2017-04-11 20:31:01 +00:00
|
|
|
# quotes being references to other post_ids, like >>34 or >>0 for OP
|
2017-04-10 14:02:08 +00:00
|
|
|
quotes = re.compile(">>([0-9]+)")
|
2017-04-11 20:31:01 +00:00
|
|
|
|
|
|
|
# linequotes being chan-style greentext,
|
|
|
|
# >like this
|
|
|
|
linequotes = re.compile("^(>.+)$", flags=re.MULTILINE)
|
|
|
|
|
|
|
|
|
|
|
|
def parse_segments(text, sanitize_linequotes=True):
|
|
|
|
"""
|
|
|
|
Parse linequotes, quotes, and paragraphs into their appropriate
|
|
|
|
representations. Paragraphs are represented as separate strings
|
|
|
|
in the returned list, and quote-types are compiled to their
|
|
|
|
[bracketed] representations.
|
|
|
|
"""
|
|
|
|
result = list()
|
|
|
|
for paragraph in [p.strip() for p in re.split("\n{2,}", text)]:
|
|
|
|
pg = str()
|
|
|
|
for segment in [s.strip() for s in paragraph.split("\n")]:
|
|
|
|
if not segment:
|
|
|
|
continue
|
|
|
|
segment = quotes.sub(lambda m: "[quote: %s]" % m.group(1), segment)
|
|
|
|
if segment.startswith(">"):
|
|
|
|
if sanitize_linequotes:
|
|
|
|
inner = segment.replace("]", "\\]")
|
|
|
|
else:
|
|
|
|
inner = segment
|
|
|
|
segment = "[linequote: %s]" % inner
|
|
|
|
# pg = pg[0:-1]
|
|
|
|
pg += segment
|
|
|
|
else:
|
|
|
|
pg += segment + " "
|
|
|
|
result.append(pg.strip())
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def sequential_expressions(string):
|
|
|
|
"""
|
|
|
|
Takes a string, sexpifies it, and returns a list of lists
|
|
|
|
who contain tuples. Each list of tuples represents a paragraph.
|
|
|
|
Within each paragraph, [0] is either None or a markup directive,
|
|
|
|
and [1] is the body of text to which it applies. This representation
|
|
|
|
is very easy to handle for a client. It semi-supports nesting:
|
|
|
|
eg, the expression [red: this [blue: is [green: mixed]]] will
|
|
|
|
return [("red", "this "), ("blue", "is "), ("green", "mixed")],
|
|
|
|
but this cannot effectively express an input like
|
|
|
|
[bold: [red: bolded colors.]], in which case the innermost
|
|
|
|
expression will take precedence. For the input:
|
|
|
|
"[bold: [red: this] is some shit [green: it cant handle]]"
|
|
|
|
you get:
|
|
|
|
[('red', 'this'), ('bold', ' is some shit '), ('green', 'it cant handle')]
|
|
|
|
"""
|
|
|
|
# abandon all hope ye who enter here
|
|
|
|
directives = colors + markup
|
|
|
|
result = list()
|
|
|
|
for paragraph in parse_segments(string):
|
|
|
|
stack = [[None, str()]]
|
|
|
|
skip_iters = []
|
|
|
|
nest = [None]
|
|
|
|
escaped = False
|
|
|
|
for index, char in enumerate(paragraph):
|
|
|
|
if skip_iters:
|
|
|
|
skip_iters.pop()
|
|
|
|
continue
|
|
|
|
|
|
|
|
if not escaped and char == "[":
|
|
|
|
directive = paragraph[index+1:paragraph.find(": ", index+1)]
|
|
|
|
open_p = directive in directives
|
|
|
|
else: open_p = False
|
|
|
|
clsd_p = not escaped and nest[-1] != None and char == "]"
|
|
|
|
|
|
|
|
# dont splice other directives into linequotes: that is far
|
|
|
|
# too confusing for the client to determine where to put line
|
|
|
|
# breaks
|
|
|
|
if open_p and nest[-1] != "linequote":
|
|
|
|
stack.append([directive, str()])
|
|
|
|
nest.append(directive)
|
|
|
|
[skip_iters.append(x) for x in range(len(directive)+2)]
|
|
|
|
|
|
|
|
elif clsd_p:
|
|
|
|
nest.pop()
|
|
|
|
stack.append([nest[-1], str()])
|
|
|
|
|
|
|
|
else:
|
|
|
|
escaped = char == "\\"
|
|
|
|
if not (escaped and paragraph[index+1] in "[]"):
|
|
|
|
stack[-1][1] += char
|
|
|
|
# filter out unused stacks, eg ["red", ""]
|
|
|
|
result.append([(directive, body) for directive, body in stack if body])
|
|
|
|
return result
|
2017-04-02 07:35:58 +00:00
|
|
|
|
|
|
|
|
|
|
|
def apply_formatting(msg_obj, formatter):
|
|
|
|
"""
|
|
|
|
Receives a messages object from a thread and returns it with
|
2017-04-11 20:31:01 +00:00
|
|
|
all the message bodies passed through FORMATTER. Not all
|
|
|
|
formatting functions have to return a string. Refer to the
|
|
|
|
documentation for each formatter.
|
2017-04-02 07:35:58 +00:00
|
|
|
"""
|
2017-04-11 20:31:01 +00:00
|
|
|
for x, obj in enumerate(msg_obj):
|
|
|
|
msg_obj[x]["body"] = formatter(obj["body"])
|
2017-04-02 07:35:58 +00:00
|
|
|
return msg_obj
|
|
|
|
|
|
|
|
|
|
|
|
def raw(text):
|
|
|
|
"""
|
|
|
|
Just return the message in the same state that it was submitted.
|
|
|
|
"""
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
2017-04-10 14:02:08 +00:00
|
|
|
def strip(text):
|
|
|
|
"""
|
|
|
|
Returns the text with all formatting directives removed.
|
|
|
|
Not to be confused with `raw`.
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def entities(text):
|
|
|
|
"""
|
|
|
|
Returns a tuple where [0] is raw text
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
2017-04-02 07:35:58 +00:00
|
|
|
def html(text):
|
|
|
|
"""
|
|
|
|
Returns messages in html format, after being sent through markdown.
|
|
|
|
Color directives are given as:
|
|
|
|
<span color="{COLOR}" style="color: {COLOR};">content</span>
|
|
|
|
|
|
|
|
Directives may be nested. If you don't have access to a fully featured
|
|
|
|
and compliant html renderer in your client, you should use one of the
|
|
|
|
simpler directives like strip, indice, or raw.
|
|
|
|
"""
|
|
|
|
|
|
|
|
text = TOKENS.sub(map_html, escape(text))
|
|
|
|
text = QUOTES.sub(r'<span post="\1" class="quote">\g<0></span>', text)
|
|
|
|
return markdown(
|
|
|
|
LINEQUOTES.sub(r'<span class="linequote">\1</span><br>', text))
|
|
|
|
|
|
|
|
# and this is the callback used by the sub statement
|
2017-03-03 01:10:16 +00:00
|
|
|
def map_html(match):
|
|
|
|
directive, body = match.group(1).lower(), match.group(2)
|
2017-04-02 07:35:58 +00:00
|
|
|
if directive in colors:
|
2017-03-03 01:10:16 +00:00
|
|
|
return '<span color="{0}" style="color: {0};">{1}</span>'.format(directive, body)
|
2017-04-02 07:35:58 +00:00
|
|
|
elif directive in markup:
|
2017-03-03 01:10:16 +00:00
|
|
|
return '<{0}>{1}</{0}>'.format(directive[0], body)
|
|
|
|
return body
|