turning markdown lists to python objects¤

midgy allows python to be a potential target for markdown source. currently, all non-code elements are treated as block strings. for a long time, i had a vision that markdown create more objects like lists and dictionaries. if we can reach these targets then markdown can exists as general purpose programming interface for many languages.

    import midgy, markdown_it
    def escape(self, body):
        body = re.compile("^(\s{,3}([\-\+\*\:]|[0-9]+[.\)]?)\s{,4})*").sub("", body)
        return super(type(self), self).escape(body)

    def update_env(self, token, env):
        super(type(self), self).update_env(token, env)
        env["hanging"]=False

we have to introduce the "hanging" and "comment" context keys to accomodate lists.

    @dataclass
    class Lists(midgy.language.python.Python):
        list_items: str = "*-."
        escape, update_env =escape, update_env
        def bullet_list_open(self, token, env):
            env["comment"] = env.get("comment") or token.markup not in self.list_items
            if token.markup not in self.list_items: return
            if not (parent := token.meta.get("parent")): yield from self.noncode_block(env, token.map[0]-1)
            if (prior := token.meta.get("prior")) and (open := prior.meta.get("open")).meta.get("parent") is parent: yield "+"
            else: yield " " * self.get_indent(env) 

        def bullet_list_close(self, token, env):
            if token.markup not in self.list_items: return
            if not token.meta["open"].meta.get("parent"): env["comment"] = False

        def list_item_open(self, token, env):
            if token.markup not in self.list_items: return
            if token.meta.get("first"):
                yield from self.noncode_block(env, token.map[0]-1, comment=True)
                yield "(["
                env["continued"] = False
            env.update(comment=False, hanging=True)

        def list_item_close(self, token, env): 
            if token.markup not in self.list_items: return
            last = token.meta.get("last")
            if (comment := env.get("comment")): yield last and "])" or ","
            yield from self.noncode_block(env, (open := token.meta.get("open")).map[1], whitespace=False, comment=comment)
            if not comment: yield last and "])" or ","
            if open.meta.get("parent"): env["comment"] = bool(token.meta.get("last"))
            env.update(hanging=False)

        ordered_list_open, ordered_list_close = bullet_list_open, bullet_list_close

        def postlex(self, tokens, env):
            parents, cleared, swaps = [], [], []
            prior = None, None
            for i, token in enumerate(tokens[::-1], 1):
                if token.type == "code_block":
                    if prior[1] is not None:
                        if prior[0].type == "list_item_close" and prior[1].type == "bullet_list_close":
                            prior[1].meta["end_code"] = prior[0].meta["end_code"] = token
                            swaps.append(i)
                if token.markup not in self.list_items: continue
                prior = token, prior[0]
                match token:
                    case markdown_it.token.Token(type="bullet_list_close" | "ordered_list_close"):
                        parents.append((token, []))
                        if cleared:
                            if (parent := cleared[-1].meta.get("parent")) is not parents[0][0]:
                                if token.level == close.level:
                                    cleared[-1].meta["prior"] = token
                    case markdown_it.token.Token(type="bullet_list_open" | "ordered_list_open"):
                        close, old = parents.pop()
                        if close.meta.get("end_code"):
                            token.map[1] = close.meta.get("end_code").map[0]
                        close.meta["open"] = token
                        if parents: token.meta["parent"] = parents[-1][0]
                        if old and close.markup in self.list_items:
                            old = [x for x in old if x.markup in self.list_items]
                            old[-1].meta["open"].meta["first"] = old[0].meta["last"] = True
                        while cleared and cleared[-1].level &lt;= token.level:
                            cleared.pop()
                        if parents and close is not parents[0][0]:
                            cleared.append(token)
                    case markdown_it.token.Token(type="list_item_close"):
                        parents[-1][1].append(token)
                    case markdown_it.token.Token(type="list_item_open"):
                        if parents:
                            if parents[-1][1][-1].meta.get("end_code"):
                                 token.map[1] = parents[-1][0].meta.get("end_code").map[0] -1
                            parents[-1][1][-1].meta["open"] = token
            if swaps:
                for swap in swaps:
                    pos = len(tokens) - swap
                    code = tokens.pop(pos)
                    tokens.insert(pos + 2, code)
                self.postlex(tokens, env)

            else:
                super(type(self), self).postlex(tokens, env)

    shell.tangle.parser = Lists()

%% -s
        (
- 
    - something

        ```python
        .upper()
        ```


            )

- something
```
.upper()
```
```
)
```

(
([([("""something""")

#         ```python
.upper()
#         ```
])])

    )

[['SOMETHING']]

%% -s
        (testing := 
whatevetr the fuck 

                    .splitlines()+

* asdfdf
* asdfadf

    - asdfasdfa
    - + asdfadsf
      + nothing

      booop 

      1. asdfadsf
      2. sadfasdfasdfvfer

      asdfasdfadf
      1. asdf89723542
      3) 082135fdva


                      + [1,2]

  saDFASDF

* asedrfadsv\



        )

    (testing :=

whatevetr the fuck

                .splitlines()+

asdfdf
asdfadf
- asdfasdfa
- - asdfadsf
- nothing
booop
1. asdfadsf
2. sadfasdfasdfvfer
asdfasdfadf 1. asdf89723542 3) 082135fdva
```
              + [1,2]
```

saDFASDF

asedrfadsv\
```
)
```

(testing := 
("""whatevetr the fuck""")

            .splitlines()+
            ([
("""asdfdf"""),            
#  * asdfadf
([
("""asdfasdfa"""),            
#      - + asdfadsf
#        + nothing

#        booop 
([
("""asdfadsf"""),
("""sadfasdfasdfvfer""")])+

([("""asdfasdfadf
1. asdf89723542""")])])
#        3) 082135fdva


              + [1,2]
,
#    saDFASDF

("""asedrfadsv""")])              \
              \
              \
              \
)

['whatevetr the fuck',
 'asdfdf',
 ['asdfasdfa',
  ['asdfadsf', 'sadfasdfasdfvfer', 'asdfasdfadf\n1. asdf89723542'],
  1,
  2],
 'asedrfadsv']

%% 
{{files.head(2).style.to_html()}}



<section hidden="">

        del files
        (gists := pandas.DataFrame(
* * https://api.github.com/users/tonyfast/gists?page=1
  * https://api.github.com/users/tonyfast/gists?page=2

* * https://api.github.com/users/tonyfast/gists?page=3
  * https://api.github.com/users/tonyfast/gists?page=4

        ).stack())
        gists = await gists.http.get()
        gists = gists.explode().series().set_index("id")
        files = gists.files.apply(dict.values).apply(list).explode().series()


</section>

	filename	type	language	raw_url	size
id
90c41d4994f75c594db804aeba56fc26	first_and_second_laws_of_thermodynamics.ipynb	text/plain	Jupyter Notebook	https://gist.githubusercontent.com/tonyfast/90c41d4994f75c594db804aeba56fc26/raw/05a74d0f32aa9dfcd507f2acaf1875f330506fb3/first_and_second_laws_of_thermodynamics.ipynb	20469
aa3b16c5a284150e3d727a843b6cefec	axe_types.py	application/x-python	Python	https://gist.githubusercontent.com/tonyfast/aa3b16c5a284150e3d727a843b6cefec/raw/2f5b9e13263abcfa1c2fee71416a0f94971c48c6/axe_types.py	8442

%% -s
        gists = pandas.DataFrame()
* * https://api.github.com/users/tonyfast/gists?page=1
  * https://api.github.com/users/tonyfast/gists?page=2
* * https://api.github.com/users/tonyfast/gists?page=3
  * https://api.github.com/users/tonyfast/gists?page=4

        .stack()

%% -s
        df = pandas.DataFrame\
* * https://api.github.com/users/tonyfast/gists?page=1
  * https://api.github.com/users/tonyfast/gists?page=2
* * https://api.github.com/users/tonyfast/gists?page=3
  * https://api.github.com/users/tonyfast/gists?page=4

%%
{{gists.head(2).style.to_html()}}

%% -sn simple_lists


* a normal
* this prelude is commented out



  ...


---
weird
---

                    x=\
stinky

                    print(code := Lists().render(simple_lists))
                    ast.parse(code)

%%

    print(py := Lists().render("""
      1. asdf
      2. asfd

         - asdfa
         - asdfasdf

      + WHAT
    """));
    ast.parse(py)

%%

    print(py := Lists().render("""a
    : asdfadf
    `: x
        : - 1
          - 2
        : stuff
          : but

        : what

    complete """));
    ast.parse(py)

%%


    print(py := Lists().render("""* a
    *   * b

        anything will



        * 
           c


        asdfasdf

      the chicanery       

        """));
    ast.parse(py)

+[]

%% -t

* a
*   * b

    anything will

    * c

    class Defs(midgy.language.python.Python):
        def dl_open(self, token, env):
            yield from self.noncode_block(env, token)
            env.setdefault("containers", []).append(token)
            indent = self.get_indent(env)
            yield " "*indent
            yield "("

        def dl_close(self, token, env):
            env["containers"].pop()
            indent = self.get_indent(env)
            yield " "*indent

        def dt_open(self, token, env):
            env.setdefault("items", []).append(token)
            block = self.generate_block_lines(env, token.map[1]+1)
            if self.noncode_blocks:
                yield from self.noncode_string(block, True, env, False, prepend="{")
                self.update_env(token, env, indented=False, quoted=False, continued=False)

        def dd_open(self, token, env):
            env.setdefault("items", []).append(token)
            token.meta.update(last=env["last"])
            prior, next_d = token.meta.get("prior"), token.meta.get("next")
            next_code = token.meta.get("next_code")
            if next_code and next_code.map[0] &lt; token.map[1]:
                next_d = next_code
                next_code_line = next_code.map[0]-1
            else:
                next_code_line = token.map[1]
            block = self.generate_block_lines(env, next_code_line)
            line = next(block)
            prepend=append=""
            print(next_d)
            if prior and prior.type == "dt_open":
                if next_d and next_d.type == "dt_open":
                    prepend = ":"
                    append = "}"
                if next_d and next_d.type == "dd_open":
                    prepend = ":["
                    append = ","
                if self.is_code_block(next_d):
                    prepend = ":"
            if prior and prior.type == "dd_open":
                if next_d and next_d.type == "dd_open":
                    append = ","
                if next_d and next_d.type == "dt_open":
                    append = "]}"
                if not next_d:
                    append = "]}"
            if not prior:
                prepend = ":"
                if not next_d:
                    append = "}"
            yield from self.noncode_string([line.lstrip().lstrip(":"), *block], True, env, False, prepend=prepend, append=append)

    print(Defs().render(


"""a
: shit show

b
: c



antasdfad
: asdfadf


            |{"what" : 2}|
    more stuff gets weird now

another
: bang
: c
"""


    ))

    pprint.pprint(Defs().parse(

"""a
: shit show

b
: c

--- 


antasdfad
: asdfadf


                    print


another
: bang
: c
"""


    ))

%% -t
    x=\
test

a
: b