""" A B A N D O N ,: A L L H O P E ,' | / : --' / F O R Y E W H O \/ /:/ E N T E R H E R E / ://_\ __/ / )'-. / Crude hacks lie beneath us. ./ :\ /.' ' This module includes a couple '/' of custom (GROAN) formatting + specifications and parsers ' me irl for them. Why did i do this? `. I have no idea! .-"- ( | . .-' '. ( (. )8: .' / (_ ) _. :(. )8P ` . ( `-' ( `. . . : ( .a8a) /_`( "a `a. )"' ( (/ . ' )==' ( ( ) .8" + (`'8a.( _( ( ..-. `8P ) ` ) + -' ( -ab: ) ' _ ` (8P"Ya _( ( )b -`. ) + ( 8) ( _.aP" _a \( \ * + )/ (8P (88 ) ) (a:f " `"` The internal representation of formatted text is much like an s-expression. They are specified as follows: [directive: this is the body of text to apply it to] The colon and the space following are important! The first space is not part of the body, but any trailing spaces after it or at the end of the body are included in the output. Escaping via backslash is supported. Nesting is supported as well, but escaping the delimiters is a bit tricky when nesting (both ends need to be escaped). See the following examples: [bold: this here \] is totally valid, and so is [<-TOTALLY OK this] [bold: \[red: but both]<-CHOKE delimiters within a nest must be escaped.] Directives are only parsed whenever the directive name is defined, and the colon/space follow it. Thus, including [brackets like this] in a post body will NOT require you to escape it! Even [brackets: like this] is safe, because brackets is not a defined formatting parameter. So, any amount of unescaped brackets may exist within the body unless they mimic a directive. To escape a valid directive, escaping only the opening is suffiecient: \[bold: like this]. The literal body of text outputted by that will be [bold: like this], with the backslash removed. Just like the brackets themselves, backslashes may occur freely within bodies, they are only removed when they occur before a valid expression. """ import re colors = [ #0, 1 2 3 4 5 6 "red", "yellow", "green", "blue", "cyan", "magenta" ] markup = [ "bold", "italic", "underline", "linequote", "quote", "rainbow" ] # PS: regex parsing is no longer used for these, preserving anyways # tokens being [red: this will be red] and [bold: this will be bold] # tokens = re.compile(r"\[(%s): (.+?)]" % "|".join(colors + markup), flags=re.DOTALL) # linequotes being chan-style greentext, # >like this # linequotes = re.compile("^(>.+)$", flags=re.MULTILINE) # quotes being references to other post_ids, like >>34 or >>0 for OP quotes = re.compile(">>([0-9]+)") def parse_segments(text, sanitize_linequotes=True): """ Parse linequotes, quotes, and paragraphs into their appropriate representations. Paragraphs are represented as separate strings in the returned list, and quote-types are compiled to their [bracketed] representations. """ result = list() for paragraph in [p.strip() for p in re.split("\n{2,}", text)]: pg = str() for segment in [s.strip() for s in paragraph.split("\n")]: if not segment: continue segment = quotes.sub(lambda m: "[quote: %s]" % m.group(1), segment) if segment.startswith(">"): if sanitize_linequotes: inner = segment.replace("]", "\\]") else: inner = segment segment = "[linequote: %s]" % inner # pg = pg[0:-1] pg += segment else: pg += segment + " " result.append(pg.strip()) return result def sequential_expressions(string): """ Takes a string, sexpifies it, and returns a list of lists who contain tuples. Each list of tuples represents a paragraph. Within each paragraph, [0] is either None or a markup directive, and [1] is the body of text to which it applies. This representation is very easy to handle for a client. It semi-supports nesting: eg, the expression [red: this [blue: is [green: mixed]]] will return [("red", "this "), ("blue", "is "), ("green", "mixed")], but this cannot effectively express an input like [bold: [red: bolded colors.]], in which case the innermost expression will take precedence. For the input: "[bold: [red: this] is some shit [green: it cant handle]]" you get: [('red', 'this'), ('bold', ' is some shit '), ('green', 'it cant handle')] """ # abandon all hope ye who enter here directives = colors + markup result = list() for paragraph in parse_segments(string): stack = [[None, str()]] skip_iters = [] nest = [None] escaped = False for index, char in enumerate(paragraph): if skip_iters: skip_iters.pop() continue if not escaped and char == "[": directive = paragraph[index+1:paragraph.find(": ", index+1)] open_p = directive in directives else: open_p = False clsd_p = not escaped and nest[-1] != None and char == "]" # dont splice other directives into linequotes: that is far # too confusing for the client to determine where to put line # breaks if open_p and nest[-1] != "linequote": stack.append([directive, str()]) nest.append(directive) [skip_iters.append(x) for x in range(len(directive)+2)] elif clsd_p: nest.pop() stack.append([nest[-1], str()]) else: escaped = char == "\\" if not (escaped and paragraph[index+1] in "[]"): stack[-1][1] += char # filter out unused stacks, eg ["red", ""] result.append([(directive, body) for directive, body in stack if body]) return result def apply_formatting(msg_obj, formatter): """ Receives a messages object from a thread and returns it with all the message bodies passed through FORMATTER. Not all formatting functions have to return a string. Refer to the documentation for each formatter. """ for x, obj in enumerate(msg_obj): msg_obj[x]["body"] = formatter(obj["body"]) return msg_obj def raw(text): """ Just return the message in the same state that it was submitted. """ return text def strip(text): """ Returns the text with all formatting directives removed. Not to be confused with `raw`. """ def entities(text): """ Returns a tuple where [0] is raw text """ def html(text): """ Returns messages in html format, after being sent through markdown. Color directives are given as: content Directives may be nested. If you don't have access to a fully featured and compliant html renderer in your client, you should use one of the simpler directives like strip, indice, or raw. """ text = TOKENS.sub(map_html, escape(text)) text = QUOTES.sub(r'\g<0>', text) return markdown( LINEQUOTES.sub(r'\1
', text)) # and this is the callback used by the sub statement def map_html(match): directive, body = match.group(1).lower(), match.group(2) if directive in colors: return '{1}'.format(directive, body) elif directive in markup: return '<{0}>{1}'.format(directive[0], body) return body