Skip to content

formatting markdown it tokens as python doctestsยค

midgy is a tool i've been crafting that translates markdown to valid python. this concept might sound perculiar from a programming perspective, but it was designed as a [literate programming] tool.

to avoid feature creep, midgy tries to stick fairly close to the commonspec when tokenizing markdown. midgy adds a doctest token to the parser. we make this addition because doctest is a [literate programming] considered in the core python language.

in this document, we convert the markdown_it tokens into valid doctest.DocTest runners.

    import midgy, doctest, unittest, typing
    from textwrap import dedent

write some sample doctests to parse

    def a_testable_function():
        """
        >>> range(1)
        range(0, 1)

        >>> assert False
        Traceback (most recent call last):
        ...
        AssertionError
        """

verify that these tests pass. doctest.testmod runs doctest on the __main__ module.

    doctest.testmod(optionflags=doctest.ELLIPSIS)
TestResults(failed=0, attempted=2)

make some markdown_it tokens from the doctests.

    (tokens := (parser := midgy.Python()).parse(a_testable_function.__doc__))
[Token(type='code_block', tag='code', nesting=0, attrs={}, map=[1, 3], level=0, children=None, content='    >>> range(1)\n    range(0, 1)\n', markup='', info='', meta={'first_indent': 4, 'last_indent': 4, 'min_indent': 4, 'is_magic': False, 'is_doctest': True, 'input': [1, 2], 'output': [2, 3]}, block=True, hidden=False),
 Token(type='code_block', tag='code', nesting=0, attrs={}, map=[4, 8], level=0, children=None, content='    >>> assert False\n    Traceback (most recent call last):\n    ...\n    AssertionError\n', markup='', info='', meta={'first_indent': 4, 'last_indent': 4, 'min_indent': 4, 'is_magic': False, 'is_doctest': True, 'input': [4, 5], 'output': [5, 8]}, block=True, hidden=False)]

get_example_from_token translates a markdown token to a doctest

    def get_example_from_token(token) -> typing.Iterable[doctest.Example]:
        m = doctest.DocTestParser._EXAMPLE_RE.match(token.content)
        want = dedent(m.group("want"))
        exc = doctest.DocTestParser._EXCEPTION_RE.match(want)
        source = "".join(x.lstrip()[4:] for x in m.group("source").splitlines(1))
        yield doctest.Example(
            source=source, want=want, lineno=token.map[0],
            exc_msg=exc.group("msg") if exc else None, indent=len(m.group("indent"))
        )

get_examples_from_tokens aggregates the doctest.Examples

    def get_examples_from_tokens(tokens) -> typing.Iterable[doctest.Example]:
        for token in tokens:
            if token.meta.get("is_doctest"):
                yield from get_example_from_token(token)

finally we generate a unittest.TestSuite

    def get_suite_from_tokens(tokens) -> unittest.TestSuite:
        suite = unittest.TestSuite()            
        for example in get_examples_from_tokens(tokens):
            suite.addTest(doctest.DocTestCase(
                doctest.DocTest([example], globals(), __name__, None, example.lineno, None),
                optionflags=doctest.ELLIPSIS
            ))
        return suite
    def run_suite(suite=(suite:=get_suite_from_tokens(tokens))) -> unittest.TestResult:
        suite.run(result:= unittest.TestResult())
        return result

run our generated doctest suite to verify that it doesn't fail.

    run_suite(suite)
<unittest.result.TestResult run=2 errors=0 failures=0>