definition lists as dictionariesยค
this notebook performs lexical analysis and string rendering of markdown definition lists to python dictionaries. this work coupled with 2024-04-03-markdown-lists-to-python.ipynb establishes markdown as a full data language with lists, dictionaries, strings, numbers, and none. i never wanted to be here, but here we are.
import midgy, markdown_it
def escape(self, body):
body = re.compile("^(\s{,3}([\-\+\*\:]|[0-9]+[.\)]?)\s{,4})*").sub("", body)
return super(type(self), self).escape(body)
def update_env(self, token, env):
super(type(self), self).update_env(token, env)
env["hanging"]=False
we have to introduce the "hanging"
and "comment"
context keys to accomodate lists.
@dataclass
class Defs(midgy.language.python.Python):
escape, update_env = escape, update_env
def dl_open(self, token, env):
parent = token.meta.get("parent")
env["comment"] = bool(env.get("comment") or parent)
if not parent: yield from self.noncode_block(env, token.map[0]-1)
if (prior := token.meta.get("prior")) and prior.meta.get("open").meta.get("parent") is parent: yield "|"
else: yield " " * self.get_indent(env)
def dl_close(self, token, env):
env["comment"] = bool(token.meta["open"].meta.get("parent"))
def dd_open(self, token, env):
if token.meta.get("first_item") and not env.get("comment"):
yield from self.noncode_block(env, token.map[0]-1, comment=True)
yield "["
env["continued"] = False
env.update(comment=False, hanging=True)
def dd_close(self, token, env):
is_item = "last_item" in token.meta
last, last_item = token.meta.get("last"), token.meta.get("last_item")
if (comment := env.get("comment")):
if is_item:
yield last_item and "]" or ","
if last:
yield "})"
elif not last_item:
yield ","
yield from self.noncode_block(env, (open := token.meta.get("open")).map[1], whitespace=False, comment=comment)
if not comment:
if last_item:
yield "]"
if last:
yield "})"
else:
yield ","
env["comment"] = bool(last and token.meta.get("parent"))
env.update(hanging=False)
def dt_open(self, token, env):
if token.meta.get("first"):
yield from self.noncode_block(env, token.map[0]-1, comment=True)
yield "({"
env["continued"] = False
env.update(comment=False, hanging=True)
def dt_close(self, token, env):
last = token.meta.get("last")
yield from self.noncode_block(env, (open := token.meta.get("open")).map[1]+1, whitespace=False, comment=env.get("comment"))
yield ":"
if open.meta.get("parent"): env["comment"] = bool(token.meta.get("last"))
env.update(hanging=False)
def postlex(self, tokens, env):
parents, cleared, swaps = [], [], []
prior = None, None
for i, token in enumerate(tokens[::-1], 1):
if token.type == "code_block":
if prior[1] is not None:
if prior[0].type == "dd_close" and prior[1].type == "dl_close":
prior[1].meta["end_code"] = prior[0].meta["end_code"] = token
swaps.append(i)
prior = token, prior[0]
match token:
case markdown_it.token.Token(type="dl_close"):
parents.append((token, []))
if cleared:
if (parent := cleared[-1]) is not parents[0][0]:
if token.level == close.level:
cleared[-1].meta["prior"] = token
case markdown_it.token.Token(type="dl_open"):
close, old = parents.pop()
if close.meta.get("end_code"):
token.map[1] = close.meta.get("end_code").map[0]
close.meta["open"] = token
if parents: token.meta["parent"] = parents[-1][0]
dt, dd = None, []
for t in old:
match t:
case markdown_it.token.Token(type="dd_close"):
dd.append(t)
case markdown_it.token.Token(type="dt_close"):
dt = t
if len(dd) > 1:
[d.meta.setdefault("last_item", None) for d in dd]
dd[-1].meta["open"].meta.setdefault("first_item", True)
dd[0].meta["last_item"] = True
dd.clear()
dt.meta["open"].meta["first"] = True
if old: old[0].meta["last"] = True
while cleared and cleared[-1].level <= token.level:
cleared.pop()
if parents and close is not parents[0][0]:
cleared.append(token)
case markdown_it.token.Token(type="dd_close" | "dt_close"):
parents[-1][1].append(token)
case markdown_it.token.Token(type="dd_open" | "dt_open"):
if parents:
if parents[-1][1][-1].meta.get("end_code"):
token.map[1] = parents[-1][0].meta.get("end_code").map[0] -1
parents[-1][1][-1].meta["open"] = token
if swaps:
for swap in swaps:
pos = len(tokens) - swap
code = tokens.pop(pos)
tokens.insert(pos + 2, code)
self.postlex(tokens, env)
else:
super(type(self), self).postlex(tokens, env)
shell.tangle.parser = Defs()
%% -s
mount up
: this is the start of the popular song regulators by warren g and nate dogg
clear black night
: clear white moon
warren g
: on the streets
: trying to consume
is a timeless classic still getting airplay
%% -s
an actual useful application of definition list dictionaries is for writing schema.
this approach means that markdown is the literate programming language for generating
the form and function of a schema.
the markdown rendered we are using at the moment doesnt present definition lists, but we'll add that revisit the post.
schema=\
title
: the title of a schema
properties
: start of the properties of the schema
length
: type
: integer
: string
description
: the length of the object
default
: ```python
1
```
end of the properties of the schema
schema