post processing html accessibility¤

this demo illustrates how an nbconvert template's html can be editted directly using BeautifulSoup

these concepts intersect multiple outstanding issues: * https://github.com/Iota-School/notebooks-for-all/issues/19#issuecomment-1251245078 * https://github.com/Iota-School/notebooks-for-all/issues/15 * https://github.com/Iota-School/notebooks-for-all/issues/20

    import nbconvert_html5
    from bs4 import BeautifulSoup
    from pathlib import Path

jupyter selectors¤

we need to collect these across representations nbconvert (lab/class), nbviewer, sphinx, mkdocs

    MAIN = "#notebook, .jp-Notebook"
    CELL = ".cell, .jp-Cell"
    CODE = ".code_cell, .jp-CodeCell"
    MD = ".text_cell, .jp-MarkdownCell"
    OUT = ".output, .jp-OutputArea.jp-Cell-outputArea"
    IN = ".code_cell .input .input_area, .jp-Editor"
    PROMPT = ".input_prompt"

the `Html5` exporter¤

currently the class does not change anything but exposes an api from directly modify exported html.

    old = nbconvert_html5.Html5().from_filename("2022-10-25-static-notebook-tags.ipynb")[0]
    source = Path("indexed-source.html"); source.write_text(old)

`jupyter` remediations for landmarks¤

we are exploring the efficacy of html5 conventions to provide accessibility landmarks in the jupyter notebook. we'll modify: * the primary container * cell inputs and outputs * executin counts

    def set_notebook(soup):
        set_main(soup); set_cells(soup); set_inputs(soup); set_prompts(soup)

    def get_html(x, **k):
        soup = BeautifulSoup(x, features="lxml"); set_notebook(soup)
        return str(soup)

the `setters`¤

    def set_main(soup):
        e = soup.select_one(MAIN)
        e.attrs.pop("tabindex", None)
        e.name = "main"

    def set_main_aside(soup):
        """[Move Metadata to the top](https://github.com/Iota-School/notebooks-for-all/issues/21)"""

    def set_cells(soup):
        for element in soup.select(CODE):
            set_code_cell(element)
        for element in soup.select(MD):
            set_md_cell(element)

    def set_code_cell(e):        
        e.name = "article"
        # in multi kernel scenarios are cell magics the input might vary
        e.attrs.setdefault("aria-label", "code cell")

    def set_md_cell(e):
        e.name = "article"
        e.attrs.setdefault("aria-label", "markdown cell")

    def set_displays(e):
        """introduces a section tag to the outputs"""

        out = e.select_one(OUT)
        out.name = "section"
        e.attrs.setdefault("aria-label", "code outputs")

    def set_inputs(soup):
        for inp in soup.select(IN):
            inp.replace_with(BeautifulSoup(F"<code><pre>{inp.text}</pre></code>", features="lxml").select_one("code"))

    def set_prompts(soup):
        """https://github.com/Iota-School/notebooks-for-all/issues/20#issuecomment-1247172797"""

        for prompt in soup.select(PROMPT):
            prompt.name = "aside"

Running the post processor¤

    new = nbconvert_html5.Html5(post_processor=get_html).from_filename("2022-10-25-static-notebook-tags.ipynb")[0]
    target = Path("indexed-target.html"); target.write_text(new);

analysis in a headless browser¤

    async def get_headless(file):
        import playwright.async_api
        from shlex import split
        async with playwright.async_api.async_playwright() as play:
            browser = await play.chromium.launch(
                args=split('--enable-blink-features="AccessibilityObjectModel"'),
                headless=True, 
                channel="chrome-beta"
            )
            page = await browser.new_page()
            state = await page.goto(file.absolute().as_uri())
            data = await page.accessibility.snapshot()
            await browser.close()
        return data

comparing results¤

import pandas
df = pandas.DataFrame(await get_headless(source)); df



A = df.children.apply(pandas.Series).set_index("role")
B = pandas.DataFrame(await get_headless(target)).children.apply(pandas.Series).set_index("role")
display("old", A.T, "new", B.T)

usage in manual testing¤

nbconvert_html5 has the hooks to work with jupyters normal command line tool.

    %%file jupyter_nbconvert_config.py
    from unittest.mock import Mock    
    c = locals().get("c", Mock()) 
    with __import__("importnb").Notebook():
        from __static_notebook_tags import get_html
    c.TemplateExporter.post_processor = get_html

    # or put your methods in here. 
    # what are the A/B tests today?

Writing jupyter_nbconvert_config.py

    if __name__ == "__main__" and "__file__" not in locals():
        !jupyter nbconvert --to html5 --stdout 2022-10-25-static-notebook-tags.ipynb

[NbConvertApp] WARNING | Config option `kernel_spec_manager_class` not recognized by `NbConvertApp`.