bbj/src/formatting.py

"""
This module is not complete and none of its functions are currently
used elsewhere. Subject to major refactoring.
"""

test = """
This is a small paragraph
thats divided between a
few rows.

this opens a few linequotes.
>this is a few
>rows of
>sequential line breaks
and this is what follows right after
"""

# from markdown import markdown
# from html import escape
import re

colors = [
#0,   1        2        3        4       5        6
    "red", "yellow", "green", "blue", "cyan", "magenta"
]

markup = [
    "bold", "italic", "underline", "linequote", "quote", "rainbow"
]

# tokens being [red: this will be red] and [bold: this will be bold]
# tokens = re.compile(r"\[(%s): (.+?)]" % "|".join(colors + markup), flags=re.DOTALL)

# quotes being references to other post_ids, like >>34 or >>0 for OP
quotes = re.compile(">>([0-9]+)")

# linequotes being chan-style greentext,
# >like this
linequotes = re.compile("^(>.+)$", flags=re.MULTILINE)


def parse_segments(text, sanitize_linequotes=True):
    """
    Parse linequotes, quotes, and paragraphs into their appropriate
    representations. Paragraphs are represented as separate strings
    in the returned list, and quote-types are compiled to their
    [bracketed] representations.
    """
    result = list()
    for paragraph in [p.strip() for p in re.split("\n{2,}", text)]:
        pg = str()
        for segment in [s.strip() for s in paragraph.split("\n")]:
            if not segment:
                continue
            segment = quotes.sub(lambda m: "[quote: %s]" % m.group(1), segment)
            if segment.startswith(">"):
                if sanitize_linequotes:
                    inner = segment.replace("]", "\\]")
                else:
                    inner = segment
                segment = "[linequote: %s]" % inner
                # pg = pg[0:-1]
                pg += segment
            else:
                pg += segment + " "
        result.append(pg.strip())
    return result


def sequential_expressions(string):
    """
    Takes a string, sexpifies it, and returns a list of lists
    who contain tuples. Each list of tuples represents a paragraph.
    Within each paragraph, [0] is either None or a markup directive,
    and [1] is the body of text to which it applies. This representation
    is very easy to handle for a client. It semi-supports nesting:
    eg, the expression [red: this [blue: is [green: mixed]]] will
    return [("red", "this "), ("blue", "is "), ("green", "mixed")],
    but this cannot effectively express an input like
    [bold: [red: bolded colors.]], in which case the innermost
    expression will take precedence. For the input:
        "[bold: [red: this] is some shit [green: it cant handle]]"
    you get:
    [('red', 'this'), ('bold', ' is some shit '), ('green', 'it cant handle')]
    """
    # abandon all hope ye who enter here
    directives = colors + markup
    result = list()
    for paragraph in parse_segments(string):
        stack = [[None, str()]]
        skip_iters = []
        nest = [None]
        escaped = False
        for index, char in enumerate(paragraph):
            if skip_iters:
                skip_iters.pop()
                continue

            if not escaped and char == "[":
                directive = paragraph[index+1:paragraph.find(": ", index+1)]
                open_p = directive in directives
            else: open_p = False
            clsd_p = not escaped and nest[-1] != None and char == "]"

            # dont splice other directives into linequotes: that is far
            # too confusing for the client to determine where to put line
            # breaks
            if open_p and nest[-1] != "linequote":
                stack.append([directive, str()])
                nest.append(directive)
                [skip_iters.append(x) for x in range(len(directive)+2)]

            elif clsd_p:
                nest.pop()
                stack.append([nest[-1], str()])

            else:
                escaped = char == "\\"
                if not (escaped and paragraph[index+1] in "[]"):
                    stack[-1][1] += char
        # filter out unused stacks, eg ["red", ""]
        result.append([(directive, body) for directive, body in stack if body])
    return result


def apply_formatting(msg_obj, formatter):
    """
    Receives a messages object from a thread and returns it with
    all the message bodies passed through FORMATTER. Not all
    formatting functions have to return a string. Refer to the
    documentation for each formatter.
    """
    for x, obj in enumerate(msg_obj):
        msg_obj[x]["body"] = formatter(obj["body"])
    return msg_obj


def raw(text):
    """
    Just return the message in the same state that it was submitted.
    """
    return text


def strip(text):
    """
    Returns the text with all formatting directives removed.
    Not to be confused with `raw`.
    """


def entities(text):
    """
    Returns a tuple where [0] is raw text
    """


def html(text):
    """
    Returns messages in html format, after being sent through markdown.
    Color directives are given as:
      <span color="{COLOR}" style="color: {COLOR};">content</span>

    Directives may be nested. If you don't have access to a fully featured
    and compliant html renderer in your client, you should use one of the
    simpler directives like strip, indice, or raw.
    """

    text = TOKENS.sub(map_html, escape(text))
    text = QUOTES.sub(r'<span post="\1" class="quote">\g<0></span>', text)
    return markdown(
        LINEQUOTES.sub(r'<span class="linequote">\1</span><br>', text))

# and this is the callback used by the sub statement
def map_html(match):
    directive, body = match.group(1).lower(), match.group(2)
    if directive in colors:
        return '<span color="{0}" style="color: {0};">{1}</span>'.format(directive, body)
    elif directive in markup:
        return '<{0}>{1}</{0}>'.format(directive[0], body)
    return body
documentation changes 2017-04-03 04:51:44 +00:00			`"""`
			`This module is not complete and none of its functions are currently`
			`used elsewhere. Subject to major refactoring.`
			`"""`

primitive, awful text formatting 2017-04-11 20:31:01 +00:00			`test = """`
			`This is a small paragraph`
			`thats divided between a`
			`few rows.`

			`this opens a few linequotes.`
			`>this is a few`
			`>rows of`
			`>sequential line breaks`
			`and this is what follows right after`
			`"""`

			`# from markdown import markdown`
			`# from html import escape`
added formatting; 2017-03-01 21:54:34 +00:00			`import re`

initial commit of non-prototype 2017-04-02 07:35:58 +00:00			`colors = [`
primitive, awful text formatting 2017-04-11 20:31:01 +00:00			`#0, 1 2 3 4 5 6`
urwid client kinda does stuff now lmao 2017-04-05 21:33:25 +00:00			`"red", "yellow", "green", "blue", "cyan", "magenta"`
initial commit of non-prototype 2017-04-02 07:35:58 +00:00			`]`
removed some old references 2017-03-04 01:56:08 +00:00
initial commit of non-prototype 2017-04-02 07:35:58 +00:00			`markup = [`
primitive, awful text formatting 2017-04-11 20:31:01 +00:00			`"bold", "italic", "underline", "linequote", "quote", "rainbow"`
initial commit of non-prototype 2017-04-02 07:35:58 +00:00			`]`
added formatting; 2017-03-01 21:54:34 +00:00
primitive, awful text formatting 2017-04-11 20:31:01 +00:00			`# tokens being [red: this will be red] and [bold: this will be bold]`
			`# tokens = re.compile(r"\[(%s): (.+?)]" % "\|".join(colors + markup), flags=re.DOTALL)`
added formatting; 2017-03-01 21:54:34 +00:00
primitive, awful text formatting 2017-04-11 20:31:01 +00:00			`# quotes being references to other post_ids, like >>34 or >>0 for OP`
add color options 2017-04-10 14:02:08 +00:00			`quotes = re.compile(">>([0-9]+)")`
primitive, awful text formatting 2017-04-11 20:31:01 +00:00
			`# linequotes being chan-style greentext,`
			`# >like this`
			`linequotes = re.compile("^(>.+)$", flags=re.MULTILINE)`


			`def parse_segments(text, sanitize_linequotes=True):`
			`"""`
			`Parse linequotes, quotes, and paragraphs into their appropriate`
			`representations. Paragraphs are represented as separate strings`
			`in the returned list, and quote-types are compiled to their`
			`[bracketed] representations.`
			`"""`
			`result = list()`
			`for paragraph in [p.strip() for p in re.split("\n{2,}", text)]:`
			`pg = str()`
			`for segment in [s.strip() for s in paragraph.split("\n")]:`
			`if not segment:`
			`continue`
			`segment = quotes.sub(lambda m: "[quote: %s]" % m.group(1), segment)`
			`if segment.startswith(">"):`
			`if sanitize_linequotes:`
			`inner = segment.replace("]", "\\]")`
			`else:`
			`inner = segment`
			`segment = "[linequote: %s]" % inner`
			`# pg = pg[0:-1]`
			`pg += segment`
			`else:`
			`pg += segment + " "`
			`result.append(pg.strip())`
			`return result`


			`def sequential_expressions(string):`
			`"""`
			`Takes a string, sexpifies it, and returns a list of lists`
			`who contain tuples. Each list of tuples represents a paragraph.`
			`Within each paragraph, [0] is either None or a markup directive,`
			`and [1] is the body of text to which it applies. This representation`
			`is very easy to handle for a client. It semi-supports nesting:`
			`eg, the expression [red: this [blue: is [green: mixed]]] will`
			`return [("red", "this "), ("blue", "is "), ("green", "mixed")],`
			`but this cannot effectively express an input like`
			`[bold: [red: bolded colors.]], in which case the innermost`
			`expression will take precedence. For the input:`
			`"[bold: [red: this] is some shit [green: it cant handle]]"`
			`you get:`
			`[('red', 'this'), ('bold', ' is some shit '), ('green', 'it cant handle')]`
			`"""`
			`# abandon all hope ye who enter here`
			`directives = colors + markup`
			`result = list()`
			`for paragraph in parse_segments(string):`
			`stack = [[None, str()]]`
			`skip_iters = []`
			`nest = [None]`
			`escaped = False`
			`for index, char in enumerate(paragraph):`
			`if skip_iters:`
			`skip_iters.pop()`
			`continue`

			`if not escaped and char == "[":`
			`directive = paragraph[index+1:paragraph.find(": ", index+1)]`
			`open_p = directive in directives`
			`else: open_p = False`
			`clsd_p = not escaped and nest[-1] != None and char == "]"`

			`# dont splice other directives into linequotes: that is far`
			`# too confusing for the client to determine where to put line`
			`# breaks`
			`if open_p and nest[-1] != "linequote":`
			`stack.append([directive, str()])`
			`nest.append(directive)`
			`[skip_iters.append(x) for x in range(len(directive)+2)]`

			`elif clsd_p:`
			`nest.pop()`
			`stack.append([nest[-1], str()])`

			`else:`
			`escaped = char == "\\"`
			`if not (escaped and paragraph[index+1] in "[]"):`
			`stack[-1][1] += char`
			`# filter out unused stacks, eg ["red", ""]`
			`result.append([(directive, body) for directive, body in stack if body])`
			`return result`
initial commit of non-prototype 2017-04-02 07:35:58 +00:00

			`def apply_formatting(msg_obj, formatter):`
			`"""`
			`Receives a messages object from a thread and returns it with`
primitive, awful text formatting 2017-04-11 20:31:01 +00:00			`all the message bodies passed through FORMATTER. Not all`
			`formatting functions have to return a string. Refer to the`
			`documentation for each formatter.`
initial commit of non-prototype 2017-04-02 07:35:58 +00:00			`"""`
primitive, awful text formatting 2017-04-11 20:31:01 +00:00			`for x, obj in enumerate(msg_obj):`
			`msg_obj[x]["body"] = formatter(obj["body"])`
initial commit of non-prototype 2017-04-02 07:35:58 +00:00			`return msg_obj`


			`def raw(text):`
			`"""`
			`Just return the message in the same state that it was submitted.`
			`"""`
			`return text`


add color options 2017-04-10 14:02:08 +00:00			`def strip(text):`
			`"""`
			`Returns the text with all formatting directives removed.`
			Not to be confused with `raw`.
			`"""`



			`def entities(text):`
			`"""`
			`Returns a tuple where [0] is raw text`
			`"""`


initial commit of non-prototype 2017-04-02 07:35:58 +00:00			`def html(text):`
			`"""`
			`Returns messages in html format, after being sent through markdown.`
			`Color directives are given as:`
			`<span color="{COLOR}" style="color: {COLOR};">content</span>`

			`Directives may be nested. If you don't have access to a fully featured`
			`and compliant html renderer in your client, you should use one of the`
			`simpler directives like strip, indice, or raw.`
			`"""`

			`text = TOKENS.sub(map_html, escape(text))`
			`text = QUOTES.sub(r'<span post="\1" class="quote">\g<0></span>', text)`
			`return markdown(`
			`LINEQUOTES.sub(r'<span class="linequote">\1</span><br>', text))`

			`# and this is the callback used by the sub statement`
haha i have a client now 2017-03-03 01:10:16 +00:00			`def map_html(match):`
			`directive, body = match.group(1).lower(), match.group(2)`
initial commit of non-prototype 2017-04-02 07:35:58 +00:00			`if directive in colors:`
haha i have a client now 2017-03-03 01:10:16 +00:00			`return '<span color="{0}" style="color: {0};">{1}</span>'.format(directive, body)`
initial commit of non-prototype 2017-04-02 07:35:58 +00:00			`elif directive in markup:`
haha i have a client now 2017-03-03 01:10:16 +00:00			`return '<{0}>{1}</{0}>'.format(directive[0], body)`
			`return body`