bbj/src/formatting.py

183 lines
5.8 KiB
Python
Raw Normal View History

2017-04-03 04:51:44 +00:00
"""
This module is not complete and none of its functions are currently
used elsewhere. Subject to major refactoring.
"""
2017-04-11 20:31:01 +00:00
test = """
This is a small paragraph
thats divided between a
few rows.
this opens a few linequotes.
>this is a few
>rows of
>sequential line breaks
and this is what follows right after
"""
# from markdown import markdown
# from html import escape
2017-03-01 21:54:34 +00:00
import re
2017-04-02 07:35:58 +00:00
colors = [
2017-04-11 20:31:01 +00:00
#0, 1 2 3 4 5 6
2017-04-05 21:33:25 +00:00
"red", "yellow", "green", "blue", "cyan", "magenta"
2017-04-02 07:35:58 +00:00
]
2017-03-04 01:56:08 +00:00
2017-04-02 07:35:58 +00:00
markup = [
2017-04-11 20:31:01 +00:00
"bold", "italic", "underline", "linequote", "quote", "rainbow"
2017-04-02 07:35:58 +00:00
]
2017-03-01 21:54:34 +00:00
2017-04-11 20:31:01 +00:00
# tokens being [red: this will be red] and [bold: this will be bold]
# tokens = re.compile(r"\[(%s): (.+?)]" % "|".join(colors + markup), flags=re.DOTALL)
2017-03-01 21:54:34 +00:00
2017-04-11 20:31:01 +00:00
# quotes being references to other post_ids, like >>34 or >>0 for OP
2017-04-10 14:02:08 +00:00
quotes = re.compile(">>([0-9]+)")
2017-04-11 20:31:01 +00:00
# linequotes being chan-style greentext,
# >like this
linequotes = re.compile("^(>.+)$", flags=re.MULTILINE)
def parse_segments(text, sanitize_linequotes=True):
"""
Parse linequotes, quotes, and paragraphs into their appropriate
representations. Paragraphs are represented as separate strings
in the returned list, and quote-types are compiled to their
[bracketed] representations.
"""
result = list()
for paragraph in [p.strip() for p in re.split("\n{2,}", text)]:
pg = str()
for segment in [s.strip() for s in paragraph.split("\n")]:
if not segment:
continue
segment = quotes.sub(lambda m: "[quote: %s]" % m.group(1), segment)
if segment.startswith(">"):
if sanitize_linequotes:
inner = segment.replace("]", "\\]")
else:
inner = segment
segment = "[linequote: %s]" % inner
# pg = pg[0:-1]
pg += segment
else:
pg += segment + " "
result.append(pg.strip())
return result
def sequential_expressions(string):
"""
Takes a string, sexpifies it, and returns a list of lists
who contain tuples. Each list of tuples represents a paragraph.
Within each paragraph, [0] is either None or a markup directive,
and [1] is the body of text to which it applies. This representation
is very easy to handle for a client. It semi-supports nesting:
eg, the expression [red: this [blue: is [green: mixed]]] will
return [("red", "this "), ("blue", "is "), ("green", "mixed")],
but this cannot effectively express an input like
[bold: [red: bolded colors.]], in which case the innermost
expression will take precedence. For the input:
"[bold: [red: this] is some shit [green: it cant handle]]"
you get:
[('red', 'this'), ('bold', ' is some shit '), ('green', 'it cant handle')]
"""
# abandon all hope ye who enter here
directives = colors + markup
result = list()
for paragraph in parse_segments(string):
stack = [[None, str()]]
skip_iters = []
nest = [None]
escaped = False
for index, char in enumerate(paragraph):
if skip_iters:
skip_iters.pop()
continue
if not escaped and char == "[":
directive = paragraph[index+1:paragraph.find(": ", index+1)]
open_p = directive in directives
else: open_p = False
clsd_p = not escaped and nest[-1] != None and char == "]"
# dont splice other directives into linequotes: that is far
# too confusing for the client to determine where to put line
# breaks
if open_p and nest[-1] != "linequote":
stack.append([directive, str()])
nest.append(directive)
[skip_iters.append(x) for x in range(len(directive)+2)]
elif clsd_p:
nest.pop()
stack.append([nest[-1], str()])
else:
escaped = char == "\\"
if not (escaped and paragraph[index+1] in "[]"):
stack[-1][1] += char
# filter out unused stacks, eg ["red", ""]
result.append([(directive, body) for directive, body in stack if body])
return result
2017-04-02 07:35:58 +00:00
def apply_formatting(msg_obj, formatter):
"""
Receives a messages object from a thread and returns it with
2017-04-11 20:31:01 +00:00
all the message bodies passed through FORMATTER. Not all
formatting functions have to return a string. Refer to the
documentation for each formatter.
2017-04-02 07:35:58 +00:00
"""
2017-04-11 20:31:01 +00:00
for x, obj in enumerate(msg_obj):
msg_obj[x]["body"] = formatter(obj["body"])
2017-04-02 07:35:58 +00:00
return msg_obj
def raw(text):
"""
Just return the message in the same state that it was submitted.
"""
return text
2017-04-10 14:02:08 +00:00
def strip(text):
"""
Returns the text with all formatting directives removed.
Not to be confused with `raw`.
"""
def entities(text):
"""
Returns a tuple where [0] is raw text
"""
2017-04-02 07:35:58 +00:00
def html(text):
"""
Returns messages in html format, after being sent through markdown.
Color directives are given as:
<span color="{COLOR}" style="color: {COLOR};">content</span>
Directives may be nested. If you don't have access to a fully featured
and compliant html renderer in your client, you should use one of the
simpler directives like strip, indice, or raw.
"""
text = TOKENS.sub(map_html, escape(text))
text = QUOTES.sub(r'<span post="\1" class="quote">\g<0></span>', text)
return markdown(
LINEQUOTES.sub(r'<span class="linequote">\1</span><br>', text))
# and this is the callback used by the sub statement
2017-03-03 01:10:16 +00:00
def map_html(match):
directive, body = match.group(1).lower(), match.group(2)
2017-04-02 07:35:58 +00:00
if directive in colors:
2017-03-03 01:10:16 +00:00
return '<span color="{0}" style="color: {0};">{1}</span>'.format(directive, body)
2017-04-02 07:35:58 +00:00
elif directive in markup:
2017-03-03 01:10:16 +00:00
return '<{0}>{1}</{0}>'.format(directive[0], body)
return body