#!/usr/bin/python import sys # Chars are redefinable within sane limits. OPEN_CHAR = '{' CLOSE_CHAR = '}' BAR_CHAR = '|' def null_op(data): """ Convenience function and handler for tags with null names. Simply returns the content. """ return data["content"] def tag_image(data): """ Example implementation of the 'image' tag. Returns a placeholder suggesting loading the image from a URL. """ if "height" in data["tags"]: data["heighttext"] = ' height="%s"' % data["tags"]["height"]["payload"] else: data["heighttext"] = "" if "width" in data["tags"]: data["widthtext"] = ' width="%s"' % data["tags"]["width"]["payload"] else: data["widthtext"] = "" return "<inline image%(heighttext)s%(widthtext)s here from %(content)s>" % data def tag_exit(data): """ Example implementation of the 'image' tag. Returns a notional 'clicky' tag to represent a possible action that would happen when clicking on the text. """ return '<clicky action="go %(alternative)s">%(content)s</clicky>' % data # Dictionary of tag names and either a string to replace them with, or a # callable to invoke. tags = { "": null_op, "oc": OPEN_CHAR, "cc": CLOSE_CHAR, "bc": BAR_CHAR, "image": tag_image, "exit": tag_exit, } def process_tag(data): """ This function handles tag name lookup from the 'tags' dictionary. If the result is a callable, it is called with the data as argument. If the result is a string, it is returned immediately. In addition, this function adds a member 'alternative' to the data dictionary. This is always the payload, unless the payload is empty, in which case it's the content. """ if data["payload"]: data["alternative"] = data["payload"] else: data["alternative"] = data["content"] if data["name"] in tags: action = tags[data["name"]] if callable(action): return action(data) else: return action else: return null_op(data) def parse_ese(original, index=0, mode="content", parent=None): """ This is the actual parser, the workhorse for parsing Extremely Simplified Enamel. The parser will run through a body of text, recursing any time an open tag is encountered and returning when a close tag is encountered, or when the end of the text is reached. The body of text is passed in as the 'original' argument, and is passed down on every recursion. It is never modified. The current position in the text is represented by the 'index' argument, and is likewise passed down when recursing. It is also returned as the second return value to indicate where the parent should resume parsing. There are three states the parser can be in: name, payload or content. Content is either the top level text outside of any tag, or the content of a specific tag. Name is the name of the tag. Payload is the additional data associated with the text. Such as a dbref for an exit. At the top level the state will generally be content (the default value). On recursion the state is always name. State transitions happen when a character signifies the end of our current state, or implicitly when the function returns. The 'data' variable holds a dictionary with name, payload and content elements, which accumulate their respective data before being passed to process_tag. Not represented in this function, a fourth element 'alternative' is added during the process_tag call. (This may be important to keep in mind when implementing a parser in a different language and/or data structure.) The return value of parse_ese is a tuple. The second value in the tuple is the index of the character that caused the return. After a recursion this is used as the new index. When there is no more text left to parse, in this implementation it will instead refer to the first 'out of bounds' character, however in practice the value is unlikely to be used at this point. The first value in the tuple is the result of the evaluation of the text between the index supplied as argument, and the index returned. When the top level function returns, it will be the full version of the text after parsing. """ state = mode if parent == None: parent = {} data = { "name": "", "payload": "", "content": "", "tags": {}, } while index < len(original): char = original[index] if char == OPEN_CHAR: result, index = parse_ese(original, index+1, "name", data["tags"]) data[state] += result elif char == CLOSE_CHAR: parent[data["name"]] = data return process_tag(data), index elif state == "content": data[state] += char elif char == BAR_CHAR: state = "content" elif state == "name": if char == " ": state = "payload" else: data[state] += char elif state == "payload": data[state] += char else: raise ValueError("No cases matched in parse_ese. (Invalid state?)") index += 1 parent[data["name"]] = data return process_tag(data), index def main(): """ Quick 'n dirty main that reads stdin, parses it and spits out the result. """ print parse_ese(sys.stdin.read())[0] if __name__ == '__main__': main()