skip to main content

@tonyfast s notebooks

site navigation
notebook summary
title
definition lists as dictionaries
description
this notebook performs lexical analysis and string rendering of markdown definition lists to python dictionaries. this work coupled with 2024-04-03-markdown-lists-to-python.ipynb establishes markdown as a full data language with lists, dictionaries, strings, numbers, and none. i never wanted to be here, but here we are.
cells
8 total
6 code
state
executed in order
kernel
Python [conda env:p311] *
language
python
name
conda-env-p311-py
lines of code
150
outputs
6
table of contents
{"kernelspec": {"display_name": "Python [conda env:p311] *", "language": "python", "name": "conda-env-p311-py"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3"}, "widgets": {"application/vnd.jupyter.widget-state+json": {"state": {}, "version_major": 2, "version_minor": 0}}, "title": "definition lists as dictionaries", "description": "this notebook performs lexical analysis and string rendering of markdown definition lists to python dictionaries.\nthis work coupled with 2024-04-03-markdown-lists-to-python.ipynb establishes\nmarkdown as a full data language with lists, dictionaries, strings, numbers, and none.\ni never wanted to be here, but here we are."}
notebook toolbar
Activate
cell ordering
1

definition lists as dictionaries

this notebook performs lexical analysis and string rendering of markdown definition lists to python dictionaries. this work coupled with 2024-04-03-markdown-lists-to-python.ipynb establishes markdown as a full data language with lists, dictionaries, strings, numbers, and none. i never wanted to be here, but here we are.

2
import midgy, markdown_it
def escape(self, body):
    body = re.compile("^(\s{,3}([\-\+\*\:]|[0-9]+[.\)]?)\s{,4})*").sub("", body)
    return super(type(self), self).escape(body)

def update_env(self, token, env):
    super(type(self), self).update_env(token, env)
    env["hanging"]=False
3

we have to introduce the "hanging" and "comment" context keys to accomodate lists.

4
@dataclass
class Defs(midgy.language.python.Python):
    escape, update_env = escape, update_env
    def dl_open(self, token, env):
        parent = token.meta.get("parent")
        env["comment"] = bool(env.get("comment") or parent)
        if not parent: yield from self.noncode_block(env, token.map[0]-1)
        if (prior := token.meta.get("prior")) and prior.meta.get("open").meta.get("parent") is parent: yield "|"
        else: yield " " * self.get_indent(env) 
            
    def dl_close(self, token, env):
        env["comment"] = bool(token.meta["open"].meta.get("parent"))
        
    def dd_open(self, token, env):
        if token.meta.get("first_item") and not env.get("comment"):
            yield from self.noncode_block(env, token.map[0]-1, comment=True)
            yield "["
            env["continued"] = False
            env.update(comment=False, hanging=True)
        
    def dd_close(self, token, env): 
        is_item = "last_item" in token.meta
        last, last_item = token.meta.get("last"), token.meta.get("last_item")

        if (comment := env.get("comment")): 
            if is_item:
                yield last_item and "]" or ","
            if last:
                yield "})"
            elif not last_item:
                yield ","
        yield from self.noncode_block(env, (open := token.meta.get("open")).map[1], whitespace=False, comment=comment)
        if not comment: 
            if last_item:
                yield "]"
            if last:
                yield "})"
            else:
                yield ","
        env["comment"] = bool(last and token.meta.get("parent")) 
        env.update(hanging=False)


    def dt_open(self, token, env):
        if token.meta.get("first"):
            yield from self.noncode_block(env, token.map[0]-1, comment=True)
            yield "({"
            env["continued"] = False
        env.update(comment=False, hanging=True)
        
    def dt_close(self, token, env): 
        last = token.meta.get("last")
        yield from self.noncode_block(env, (open := token.meta.get("open")).map[1]+1, whitespace=False, comment=env.get("comment"))
        yield ":"
        if open.meta.get("parent"): env["comment"] = bool(token.meta.get("last"))
        env.update(hanging=False)
        
    def postlex(self, tokens, env):
        parents, cleared, swaps = [], [], []
        prior = None, None
        for i, token in enumerate(tokens[::-1], 1):
            if token.type == "code_block":
                if prior[1] is not None:
                    if prior[0].type == "dd_close" and prior[1].type == "dl_close":
                        prior[1].meta["end_code"] = prior[0].meta["end_code"] = token
                        swaps.append(i)
            prior = token, prior[0]
            match token:
                case markdown_it.token.Token(type="dl_close"):
                    parents.append((token, []))
                    if cleared:
                        if (parent := cleared[-1]) is not parents[0][0]:
                            if token.level == close.level:
                                cleared[-1].meta["prior"] = token
                case markdown_it.token.Token(type="dl_open"):
                    close, old = parents.pop()
                    if close.meta.get("end_code"):
                        token.map[1] = close.meta.get("end_code").map[0]
                    close.meta["open"] = token
                    if parents: token.meta["parent"] = parents[-1][0]
                    dt, dd = None, []
                    for t in old:
                        match t:
                            case markdown_it.token.Token(type="dd_close"):
                                dd.append(t)
                            case markdown_it.token.Token(type="dt_close"):
                                dt = t
                                if len(dd) > 1:
                                    [d.meta.setdefault("last_item", None) for d in dd]
                                    dd[-1].meta["open"].meta.setdefault("first_item", True)
                                    dd[0].meta["last_item"] = True
                                dd.clear()
                    dt.meta["open"].meta["first"] = True
                    if old: old[0].meta["last"] = True
                    while cleared and cleared[-1].level <= token.level:
                        cleared.pop()
                    if parents and close is not parents[0][0]:
                        cleared.append(token)
                case markdown_it.token.Token(type="dd_close" | "dt_close"):
                    parents[-1][1].append(token)
                case markdown_it.token.Token(type="dd_open" | "dt_open"):
                    if parents:
                        if parents[-1][1][-1].meta.get("end_code"):
                             token.map[1] = parents[-1][0].meta.get("end_code").map[0] -1
                        parents[-1][1][-1].meta["open"] = token
        if swaps:
            for swap in swaps:
                pos = len(tokens) - swap
                code = tokens.pop(pos)
                tokens.insert(pos + 2, code)
            self.postlex(tokens, env)
            
        else:
            super(type(self), self).postlex(tokens, env)

shell.tangle.parser = Defs()
5 3 outputs.
mount up

this is the start of the popular song regulators by warren g and nate dogg

clear black night
clear white moon
warren g
on the streets
trying to consume

is a timeless classic still getting airplay

({
("""mount up"""):
#  : this is the start of the popular song regulators by warren g and nate dogg
({
("""clear black night"""):
("""clear white moon"""),

("""warren g"""):
[("""on the streets"""),
("""trying to consume""")]})})

#    is a timeless classic still getting airplay

{'mount up': {'clear black night': 'clear white moon',
  'warren g': ['on the streets', 'trying to consume']}}
6 3 outputs.

an actual useful application of definition list dictionaries is for writing schema. this approach means that markdown is the literate programming language for generating the form and function of a schema.

the markdown rendered we are using at the moment doesnt present definition lists, but we'll add that revisit the post.

        schema=\
title

the title of a schema

properties

start of the properties of the schema

length
type
integer
string
description
the length of the object
default
     1

end of the properties of the schema

      schema
("""an actual useful application of definition list dictionaries is for writing schema.
this approach means that markdown is the literate programming language for generating
the form and function of a schema.

the markdown rendered we are using at the moment doesnt present definition lists, but we'll add that revisit the 
post.""")

schema=\
({("""title"""):
("""the title of a schema"""),

("""properties"""):
#  : start of the properties of the schema
({
("""length"""):
({("""type"""):
[("""integer"""),
("""string""")],

("""description"""):
("""the length of the object"""),

("""default"""):
#     :  ```python
1
#        ```
})})
})#    end of the properties of the schema

schema

{'title': 'the title of a schema',
 'properties': {'length': {'type': ['integer', 'string'],
   'description': 'the length of the object',
   'default': 1}}}
7

8