post processing html accessibility¤
this demo illustrates how an nbconvert
template's html can be editted directly
using BeautifulSoup
these concepts intersect multiple outstanding issues: * https://github.com/Iota-School/notebooks-for-all/issues/19#issuecomment-1251245078 * https://github.com/Iota-School/notebooks-for-all/issues/15 * https://github.com/Iota-School/notebooks-for-all/issues/20
import nbconvert_html5
from bs4 import BeautifulSoup
from pathlib import Path
jupyter selectors¤
we need to collect these across representations nbconvert (lab/class), nbviewer, sphinx, mkdocs
MAIN = "#notebook, .jp-Notebook"
CELL = ".cell, .jp-Cell"
CODE = ".code_cell, .jp-CodeCell"
MD = ".text_cell, .jp-MarkdownCell"
OUT = ".output, .jp-OutputArea.jp-Cell-outputArea"
IN = ".code_cell .input .input_area, .jp-Editor"
PROMPT = ".input_prompt"
the Html5
exporter¤
currently the class does not change anything but exposes an api from directly modify exported html.
old = nbconvert_html5.Html5().from_filename("2022-10-25-static-notebook-tags.ipynb")[0]
source = Path("indexed-source.html"); source.write_text(old)
jupyter
remediations for landmarks¤
we are exploring the efficacy of html5 conventions to provide accessibility landmarks in the jupyter notebook. we'll modify: * the primary container * cell inputs and outputs * executin counts
def set_notebook(soup):
set_main(soup); set_cells(soup); set_inputs(soup); set_prompts(soup)
def get_html(x, **k):
soup = BeautifulSoup(x, features="lxml"); set_notebook(soup)
return str(soup)
the setters
¤
def set_main(soup):
e = soup.select_one(MAIN)
e.attrs.pop("tabindex", None)
e.name = "main"
def set_main_aside(soup):
"""[Move Metadata to the top](https://github.com/Iota-School/notebooks-for-all/issues/21)"""
def set_cells(soup):
for element in soup.select(CODE):
set_code_cell(element)
for element in soup.select(MD):
set_md_cell(element)
def set_code_cell(e):
e.name = "article"
# in multi kernel scenarios are cell magics the input might vary
e.attrs.setdefault("aria-label", "code cell")
def set_md_cell(e):
e.name = "article"
e.attrs.setdefault("aria-label", "markdown cell")
def set_displays(e):
"""introduces a section tag to the outputs"""
out = e.select_one(OUT)
out.name = "section"
e.attrs.setdefault("aria-label", "code outputs")
def set_inputs(soup):
for inp in soup.select(IN):
inp.replace_with(BeautifulSoup(F"<code><pre>{inp.text}</pre></code>", features="lxml").select_one("code"))
def set_prompts(soup):
"""https://github.com/Iota-School/notebooks-for-all/issues/20#issuecomment-1247172797"""
for prompt in soup.select(PROMPT):
prompt.name = "aside"
Running the post processor¤
new = nbconvert_html5.Html5(post_processor=get_html).from_filename("2022-10-25-static-notebook-tags.ipynb")[0]
target = Path("indexed-target.html"); target.write_text(new);
analysis in a headless browser¤
async def get_headless(file):
import playwright.async_api
from shlex import split
async with playwright.async_api.async_playwright() as play:
browser = await play.chromium.launch(
args=split('--enable-blink-features="AccessibilityObjectModel"'),
headless=True,
channel="chrome-beta"
)
page = await browser.new_page()
state = await page.goto(file.absolute().as_uri())
data = await page.accessibility.snapshot()
await browser.close()
return data
comparing results¤
import pandas
df = pandas.DataFrame(await get_headless(source)); df
A = df.children.apply(pandas.Series).set_index("role")
B = pandas.DataFrame(await get_headless(target)).children.apply(pandas.Series).set_index("role")
display("old", A.T, "new", B.T)
usage in manual testing¤
nbconvert_html5
has the hooks to work with jupyter
s normal command line tool.
%%file jupyter_nbconvert_config.py
from unittest.mock import Mock
c = locals().get("c", Mock())
with __import__("importnb").Notebook():
from __static_notebook_tags import get_html
c.TemplateExporter.post_processor = get_html
# or put your methods in here.
# what are the A/B tests today?
if __name__ == "__main__" and "__file__" not in locals():
!jupyter nbconvert --to html5 --stdout 2022-10-25-static-notebook-tags.ipynb