invoking the ibm equal access checker from python¤
the ibm equal access checker has some really weird opinions about how to use it. makes me appreciate axe a bit more. anyway lets figure out how to negotiate things.
async def achecker_run(file, print=True, stdout=None, stderr=None):
"""run the equal access checker with node the results are written to disc"""
file = Path(file)
if not file.exists():
raise FileNotFoundError(file)
npx = shutil.which("npx")
if not print:
stdout = stdout or subprocess.PIPE
stderr = stderr or subprocess.PIPE
proc = await asyncio.create_subprocess_shell(F"""
{npx} achecker --outputFormat json --outputFolder achecker {file}
""", stdout=stdout, stderr=stderr)
await proc.communicate()
FIND_SUMMARY_JSON = """find {} -type f -name "summary_*.json" -printf "%Tc %p\n" | sort -r"""
async def achecker(file, dir="achecker", print=False, summary_only=False, stdout=None, stderr=None):
# run the checker that save everything to disk
await achecker_run(file, print=False, stdout=stdout, stderr=stderr)
# load the results of the checker from disk
return await (summary_only and find_achecker_summary or find_achecker_results)(file, dir)
async def find_achecker_results(file, dir="achecker"):
"""the results are stored in a prefixed directory nested by its absolute path"""
result = anyio.Path(dir, *os.path.abspath(file).split(os.sep))
if await result.is_dir():
files = list(Path(result).rglob("*.json"))
return dict(zip(files, map(json.loads, await asyncio.gather(
*(file.read_text() for file in map(anyio.Path, files))
))))
result = result.with_suffix(result.suffix + ".json")
return json.loads(await result.read_text())
async def find_achecker_summaries(dir="achecker"):
"""achecker creates a summary file for each run. we start with the most recent summary and work backwards."""
proc = await asyncio.create_subprocess_shell(FIND_SUMMARY_JSON.format(dir), stdout=subprocess.PIPE)
out, err = await proc.communicate()
return out.splitlines()
async def find_achecker_summary(file, dir):
for summary in await find_achecker_summaries(dir):
# work through the newest summaries to find the one that matches the file
summary = json.loads(await anyio.Path(summary.rsplit(maxsplit=1)[1].decode()).read_text())
for scan in summary.get("pageScanSummary"):
# verify the scan label is the same as the filename
if os.path.samefile(scan["label"], file):
return summary
if MAIN := ("__file__" not in locals()):
path = Path("../../../nbconvert-a11y/tests/exports/html/")
results = await achecker(path, stderr=subprocess.PIPE)
show the results aggregated across multiple pages of accessibility failures. there are over a 1000 row in this dataset and we are out of a manual territory in eyeballing these results.
if MAIN:
df = DataFrame(results).T.results.explode().series()
df = df.join(df.pop("path").series())
MAIN and df
over the aggregate we find about 10 violations or errors.
MAIN and df.groupby("ruleId").value.count().sort_values()
aggregating accessibility vulnerabilities over many pages is challenging. these results are solely demonstrate the ibm equal access results, but we need to composite these with the axe checker and nu validator to expand the scope of testing. when we test many system we now have the challenge of regristration of all of the different selectors to parts parts of the document.