Skip to content

definition lists as dictionariesยค

this notebook performs lexical analysis and string rendering of markdown definition lists to python dictionaries. this work coupled with 2024-04-03-markdown-lists-to-python.ipynb establishes markdown as a full data language with lists, dictionaries, strings, numbers, and none. i never wanted to be here, but here we are.

import midgy, markdown_it
def escape(self, body):
    body = re.compile("^(\s{,3}([\-\+\*\:]|[0-9]+[.\)]?)\s{,4})*").sub("", body)
    return super(type(self), self).escape(body)

def update_env(self, token, env):
    super(type(self), self).update_env(token, env)
    env["hanging"]=False

we have to introduce the "hanging" and "comment" context keys to accomodate lists.

@dataclass
class Defs(midgy.language.python.Python):
    escape, update_env = escape, update_env
    def dl_open(self, token, env):
        parent = token.meta.get("parent")
        env["comment"] = bool(env.get("comment") or parent)
        if not parent: yield from self.noncode_block(env, token.map[0]-1)
        if (prior := token.meta.get("prior")) and prior.meta.get("open").meta.get("parent") is parent: yield "|"
        else: yield " " * self.get_indent(env) 

    def dl_close(self, token, env):
        env["comment"] = bool(token.meta["open"].meta.get("parent"))

    def dd_open(self, token, env):
        if token.meta.get("first_item") and not env.get("comment"):
            yield from self.noncode_block(env, token.map[0]-1, comment=True)
            yield "["
            env["continued"] = False
            env.update(comment=False, hanging=True)

    def dd_close(self, token, env): 
        is_item = "last_item" in token.meta
        last, last_item = token.meta.get("last"), token.meta.get("last_item")

        if (comment := env.get("comment")): 
            if is_item:
                yield last_item and "]" or ","
            if last:
                yield "})"
            elif not last_item:
                yield ","
        yield from self.noncode_block(env, (open := token.meta.get("open")).map[1], whitespace=False, comment=comment)
        if not comment: 
            if last_item:
                yield "]"
            if last:
                yield "})"
            else:
                yield ","
        env["comment"] = bool(last and token.meta.get("parent")) 
        env.update(hanging=False)


    def dt_open(self, token, env):
        if token.meta.get("first"):
            yield from self.noncode_block(env, token.map[0]-1, comment=True)
            yield "({"
            env["continued"] = False
        env.update(comment=False, hanging=True)

    def dt_close(self, token, env): 
        last = token.meta.get("last")
        yield from self.noncode_block(env, (open := token.meta.get("open")).map[1]+1, whitespace=False, comment=env.get("comment"))
        yield ":"
        if open.meta.get("parent"): env["comment"] = bool(token.meta.get("last"))
        env.update(hanging=False)

    def postlex(self, tokens, env):
        parents, cleared, swaps = [], [], []
        prior = None, None
        for i, token in enumerate(tokens[::-1], 1):
            if token.type == "code_block":
                if prior[1] is not None:
                    if prior[0].type == "dd_close" and prior[1].type == "dl_close":
                        prior[1].meta["end_code"] = prior[0].meta["end_code"] = token
                        swaps.append(i)
            prior = token, prior[0]
            match token:
                case markdown_it.token.Token(type="dl_close"):
                    parents.append((token, []))
                    if cleared:
                        if (parent := cleared[-1]) is not parents[0][0]:
                            if token.level == close.level:
                                cleared[-1].meta["prior"] = token
                case markdown_it.token.Token(type="dl_open"):
                    close, old = parents.pop()
                    if close.meta.get("end_code"):
                        token.map[1] = close.meta.get("end_code").map[0]
                    close.meta["open"] = token
                    if parents: token.meta["parent"] = parents[-1][0]
                    dt, dd = None, []
                    for t in old:
                        match t:
                            case markdown_it.token.Token(type="dd_close"):
                                dd.append(t)
                            case markdown_it.token.Token(type="dt_close"):
                                dt = t
                                if len(dd) > 1:
                                    [d.meta.setdefault("last_item", None) for d in dd]
                                    dd[-1].meta["open"].meta.setdefault("first_item", True)
                                    dd[0].meta["last_item"] = True
                                dd.clear()
                    dt.meta["open"].meta["first"] = True
                    if old: old[0].meta["last"] = True
                    while cleared and cleared[-1].level <= token.level:
                        cleared.pop()
                    if parents and close is not parents[0][0]:
                        cleared.append(token)
                case markdown_it.token.Token(type="dd_close" | "dt_close"):
                    parents[-1][1].append(token)
                case markdown_it.token.Token(type="dd_open" | "dt_open"):
                    if parents:
                        if parents[-1][1][-1].meta.get("end_code"):
                             token.map[1] = parents[-1][0].meta.get("end_code").map[0] -1
                        parents[-1][1][-1].meta["open"] = token
        if swaps:
            for swap in swaps:
                pos = len(tokens) - swap
                code = tokens.pop(pos)
                tokens.insert(pos + 2, code)
            self.postlex(tokens, env)

        else:
            super(type(self), self).postlex(tokens, env)

shell.tangle.parser = Defs()
%% -s
mount up
: this is the start of the popular song regulators by warren g and nate dogg

  clear black night
  : clear white moon

  warren g
  : on the streets
  : trying to consume

  is a timeless classic still getting airplay
mount up

this is the start of the popular song regulators by warren g and nate dogg

clear black night
clear white moon
warren g
on the streets
trying to consume

is a timeless classic still getting airplay

({
("""mount up"""):
#  : this is the start of the popular song regulators by warren g and nate dogg
({
("""clear black night"""):
("""clear white moon"""),

("""warren g"""):
[("""on the streets"""),
("""trying to consume""")]})})

#    is a timeless classic still getting airplay

{'mount up': {'clear black night': 'clear white moon',
  'warren g': ['on the streets', 'trying to consume']}}
%% -s
an actual useful application of definition list dictionaries is for writing schema.
this approach means that markdown is the literate programming language for generating
the form and function of a schema.

the markdown rendered we are using at the moment doesnt present definition lists, but we'll add that revisit the post.

            schema=\
title
: the title of a schema

properties
: start of the properties of the schema

  length
  : type
    : integer
    : string

    description
    : the length of the object

    default
    :  ```python
            1
       ```

  end of the properties of the schema

            schema

an actual useful application of definition list dictionaries is for writing schema. this approach means that markdown is the literate programming language for generating the form and function of a schema.

the markdown rendered we are using at the moment doesnt present definition lists, but we'll add that revisit the post.

        schema=\
title

the title of a schema

properties

start of the properties of the schema

length
type
integer
string
description
the length of the object
default
     1

end of the properties of the schema

      schema
("""an actual useful application of definition list dictionaries is for writing schema.
this approach means that markdown is the literate programming language for generating
the form and function of a schema.

the markdown rendered we are using at the moment doesnt present definition lists, but we'll add that revisit the 
post.""")

schema=\
({("""title"""):
("""the title of a schema"""),

("""properties"""):
#  : start of the properties of the schema
({
("""length"""):
({("""type"""):
[("""integer"""),
("""string""")],

("""description"""):
("""the length of the object"""),

("""default"""):
#     :  ```python
1
#        ```
})})
})#    end of the properties of the schema

schema

{'title': 'the title of a schema',
 'properties': {'length': {'type': ['integer', 'string'],
   'description': 'the length of the object',
   'default': 1}}}