ese.py

#!/usr/bin/python

import sys

# Chars are redefinable within sane limits.
OPEN_CHAR = '{'
CLOSE_CHAR = '}'
BAR_CHAR = '|'

def null_op(data):
  """
  Convenience function and handler for tags with null names.

  Simply returns the content.
  """
  return data["content"]

def tag_image(data):
  """
  Example implementation of the 'image' tag.

  Returns a placeholder suggesting loading the image from a URL.
  """
  if "height" in data["tags"]:
    data["heighttext"] = ' height="%s"' % data["tags"]["height"]["payload"]
  else:
    data["heighttext"] = ""
  if "width" in data["tags"]:
    data["widthtext"] = ' width="%s"' % data["tags"]["width"]["payload"]
  else:
    data["widthtext"] = ""
  return "<inline image%(heighttext)s%(widthtext)s here from %(content)s>" % data

def tag_exit(data):
  """
  Example implementation of the 'image' tag.

  Returns a notional 'clicky' tag to represent a possible action that
  would happen when clicking on the text.
  """
  return '<clicky action="go %(alternative)s">%(content)s</clicky>' % data

# Dictionary of tag names and either a string to replace them with, or a
# callable to invoke.
tags = {
  "": null_op,
  "oc": OPEN_CHAR,
  "cc": CLOSE_CHAR,
  "bc": BAR_CHAR,
  "image": tag_image,
  "exit": tag_exit,
  }

def process_tag(data):
  """
  This function handles tag name lookup from the 'tags' dictionary.
  
  If the result is a callable, it is called with the data as argument. If the
  result is a string, it is returned immediately.

  In addition, this function adds a member 'alternative' to the data
  dictionary. This is always the payload, unless the payload is empty, in
  which case it's the content.
  """
  if data["payload"]:
    data["alternative"] = data["payload"]
  else:
    data["alternative"] = data["content"]
  if data["name"] in tags:
    action = tags[data["name"]]
    if callable(action):
      return action(data)
    else:
      return action
  else:
    return null_op(data)

def parse_ese(original, index=0, mode="content", parent=None):
  """
  This is the actual parser, the workhorse for parsing Extremely
  Simplified Enamel.

  The parser will run through a body of text, recursing any time an
  open tag is encountered and returning when a close tag is
  encountered, or when the end of the text is reached.

  The body of text is passed in as the 'original' argument, and is
  passed down on every recursion. It is never modified. The current
  position in the text is represented by the 'index' argument, and is
  likewise passed down when recursing. It is also returned as the
  second return value to indicate where the parent should resume
  parsing.

  There are three states the parser can be in: name, payload or
  content. Content is either the top level text outside of any tag, or
  the content of a specific tag. Name is the name of the tag. Payload
  is the additional data associated with the text. Such as a dbref for
  an exit. At the top level the state will generally be content (the
  default value). On recursion the state is always name. State
  transitions happen when a character signifies the end of our current
  state, or implicitly when the function returns.

  The 'data' variable holds a dictionary with name, payload and
  content elements, which accumulate their respective data before
  being passed to process_tag. Not represented in this function, a
  fourth element 'alternative' is added during the process_tag call.
  (This may be important to keep in mind when implementing a parser in
  a different language and/or data structure.)

  The return value of parse_ese is a tuple. The second value in the
  tuple is the index of the character that caused the return. After a
  recursion this is used as the new index. When there is no more text
  left to parse, in this implementation it will instead refer to the
  first 'out of bounds' character, however in practice the value is
  unlikely to be used at this point. The first value in the tuple is
  the result of the evaluation of the text between the index supplied
  as argument, and the index returned. When the top level function
  returns, it will be the full version of the text after parsing.
  """
  
  state = mode
  if parent == None:
    parent = {}
  data = {
    "name": "",
    "payload": "",
    "content": "",
    "tags": {},
    }

  while index < len(original):
    char = original[index]

    if char == OPEN_CHAR:
      result, index = parse_ese(original, index+1, "name", data["tags"])
      data[state] += result

    elif char == CLOSE_CHAR:
      parent[data["name"]] = data
      return process_tag(data), index

    elif state == "content":
      data[state] += char

    elif char == BAR_CHAR:
      state = "content"

    elif state == "name":
      if char == " ":
        state = "payload"
      else:
        data[state] += char

    elif state == "payload":
        data[state] += char

    else:
      raise ValueError("No cases matched in parse_ese. (Invalid state?)")

    index += 1
  parent[data["name"]] = data
  return process_tag(data), index

def main():
  """
  Quick 'n dirty main that reads stdin, parses it and spits out the result.
  """
  print parse_ese(sys.stdin.read())[0]

if __name__ == '__main__':
  main()