adding aria to long or wide tables¤
sometimes tables are large and truncated by default to save screen and disc real estate. when this happens, we break the meaning of the columns/row ordering to assistive technology. we need to supplement the table elements with aria to ensure an accessible experience.
we'll also highlight a persistent ambiguity between visual dataframe indexing and audible dataframe indexing.
import pandas, bs4, enum, numpy, midgy, functools
get_ipython().display_formatter.formatters["text/html"].for_type(bs4.BeautifulSoup, str);
%%
<style>
:is(.jp-OutputArea-output.jp-RenderedHTMLCommon, .nb-outputs) :is(td,th) {
border: 1px solid;
}
</style>
get an aria marked up, non-uniform table.
def get_table(df, ARIA=True, caption=None):
soup = bs4.BeautifulSoup(features="lxml")
ROWS, COLS = any(df.index.names), any(df.columns.names)
WIDE = (df.shape[1] + 1) > pandas.options.display.max_columns
LONG = (df.shape[0] + 1) > pandas.options.display.max_rows
col_ranges, row_ranges = get_ranges(df, WIDE, LONG)
soup.append(table := new("table",
colcount=row_major_at_cols(df) if ARIA or WIDE else None,
rowcount=row_major_at_rows(df) if ARIA or LONG else None))
table.append(cap := new("caption", caption))
cap.append(get_caption(df))
get_thead(df, table, col_ranges, WIDE, ARIA, LONG)
get_tbody(df, table, col_ranges, row_ranges, WIDE, ARIA, LONG)
return soup
def get_thead(df, table, col_ranges, WIDE=False, ARIA=False, LONG=False):
ROWS, COLS = any(df.index.names), any(df.columns.names)
col_center = col_ranges[1].start - col_ranges[0].stop
for col_level, col_name in enumerate(df.columns.names):
table.append(tr := trow(rowindex=col_level+1 if ARIA or LONG and row_part else None))
if not col_level:
if ROWS or not COLS:
for row_level, row_name in enumerate(df.index.names):
tr.append(th := theading(
str(row_name) or F"index {row_level}", scope="col",
rowspan=df.columns.nlevels if df.columns.nlevels > 1 else None,
colindex=row_level+1 if ARIA else None))
if COLS: tr.append(theading(
str(col_name) or F"level {col_level}", scope="row",
colindex=df.index.nlevels+1 if ARIA else None))
for col_part, col_range in enumerate(col_ranges):
if col_part:
tr.append(theading(HIDDEN, colindex=col_index+2+df.index.nlevels + bool(LONG and WIDE) if ARIA else None,
**{"aria-colspan": col_center}))
for col_index in col_range:
col_value = df.columns.get_level_values(col_level)[col_index]
tr.append(theading(str(col_value), scope="col", colindex=df.index.nlevels + int(ROWS and COLS) + col_index + 1 if ARIA or WIDE and col_part else None))
one of the confusions that will be encountered is that natural row and column indexing of tidy data frames will be inconsistent with those announced by assistive technology. assistive technology indexes tables starting from
def row_major_at_rows(df): return df.columns.nlevels + len(df)
def row_major_at_cols(df): return df.index.nlevels + int(any(df.columns.names)) + len(df.columns)
we present a potentational solution provide more information in the caption that may alert screen readers to the mismatch.
def get_caption(df):
dl = new("dl", role="presentation")
dl.append(new("dt", "rows")), dl.append(new("dd", str(len(df))))
dl.append(new("dt", "columns")), dl.append(new("dd", str(len(df.columns))))
dl.append(new("dt", "indexes:")), dl.append(new("dd", indexes := new("dl", role="presentation")))
indexes.append(new("dt", "rows")), indexes.append(new("dd", str(df.index.nlevels)))
indexes.append(new("dt", "columns")), indexes.append(new("dd", str(df.columns.nlevels)))
return dl
iterate through the rows of data and append them to the table
def get_tbody(df, table, col_ranges, row_ranges, WIDE=False, ARIA=False, LONG=False):
ROWS, COLS = any(df.index.names), any(df.columns.names)
row_center = row_ranges[1].start - row_ranges[0].stop
col_center = col_ranges[1].start - col_ranges[0].stop
for row_part, row_range in enumerate(row_ranges):
if row_part:
table.append(tr := trow(rowindex=row_index+2+df.columns.nlevels, **{"aria-rowspan": row_center}))
for row_level in range(df.index.nlevels): tr.append(theading(HIDDEN,colindex=row_level+1))
if ROWS and COLS: tr.append(tdata(EMPTY, colindex=row_level+2))
for col_part, col_range in enumerate(col_ranges):
if col_part: tr.append(tdata(HIDDEN, colindex=col_index+2+df.index.nlevels+int(ROWS and COLS), **{"aria-rowspan": row_center, "aria-colspan": col_center}),)
for col_index in col_range: tr.append(tdata(HIDDEN, colindex=col_index + 1 + df.index.nlevels + int(ROWS and COLS)))
for row_index in row_range:
table.append(tr := trow(rowindex=row_index+1+df.columns.nlevels))
for row_level in range(df.index.nlevels):
tr.append(theading(str(df.index.get_level_values(row_level)[row_index]), colindex=row_level+1 if ARIA else None, scope="row"))
if ROWS and COLS: tr.append(tdata(EMPTY, colindex=row_level+2))
for col_part, col_range in enumerate(col_ranges):
if col_part: tr.append(tdata(
HIDDEN, colindex=col_index+2+ df.index.nlevels + int(ROWS and COLS), **{"aria-colspan": col_center}))
for col_index in col_range:
tr.append(tdata(str(df.iloc[row_index, col_index]), colindex=col_index + 1 + df.index.nlevels + int(ROWS and COLS)))
pandas.options.display.max_columns, pandas.options.display.max_rows
determine the ranges of values that are presented to the visitor.
this work implements special logic for spanning multiple rows and columns for truncated data.
def get_frame_bounds(df, WIDE=False, LONG=False):
a, b, c, d = len(df.columns), len(df.columns), len(df), len(df)
if WIDE: a = pandas.options.display.max_columns//2; b -= a
if LONG: c = pandas.options.display.max_rows//2; d -= c
return a, b, c, d
def get_ranges(df, WIDE=False, LONG=False):
a, b, c, d = get_frame_bounds(df, WIDE=WIDE, LONG=LONG)
return (range(a), range(b, df.shape[1])), (range(c), range(d, df.shape[0]))
to provide the proper mark up for large tables we need to be rigorous about use of ARIA: rowindex
colindex
rowspan
colspan
.
def new(tag,
string=None,rowindex=None, colindex=None, rowcount=None, colcount=None,rowspan=None,colspan=None,scope=None,
*, soup=bs4.BeautifulSoup(features="lxml"), **attrs):
"""create a new beautiful soup with table and aria properties"""
data = locals()
attrs.update({F"aria-{k}": data.get(k) for k in ["rowindex", "colindex", "rowcount", "colcount"] if data.get(k)})
attrs.update({k: data.get(k) for k in ["rowspan", "colspan", "scope"] if data.get(k)})
tag = soup.new_tag(tag, attrs=attrs)
if string: tag.append(string)
return tag
trow = functools.partial(new, "tr")
theading = functools.partial(new, "th")
tdata = functools.partial(new, "td")
sample data
index = pandas.MultiIndex.from_product([["A", "Z"], ["M", "N", "O"], [1, 2, 3]], names=[*"JKL"])
(df := pandas.DataFrame(columns=index, index=index).rename_axis(columns=[10, 100, 1000]).head())
single = df.droplevel((0, 1), 0).droplevel((0, 1), 1).rename_axis(None, axis=1).rename_axis(None, axis=0)
wide = pandas.concat([pandas.concat([df]*10, axis=1)]*20)
with (options := pandas.option_context("display.max_rows", 4, "display.max_columns", 4)):
display(wide)
%%
<style>
/**inline flex the dl block display and force it cause jupyter is aggressive.**/
table>caption dl {
display: inline-flex !important;
justify-content: flex-start;
}
/** add punctation after each of the descriptors. **/
table>caption dl>dd {
&::after {content: ", "}; &:last-child::after {content: " "};
}
/**unset some jupyter nonsense**/
dl > dt, dl > dd {
width: unset !important;
float: unset !important;
padding-right: 1rem !important;
}
</style>
HIDDEN and EMPTY are used for visual verification of the technique.
HIDDEN, EMPTY = "hidden", "empty"
with options: display(get_table(wide, "a smaller table representation with hidden rows and columns."))
with pandas.option_context("display.max_rows", 10, "display.max_columns", 10): display(get_table(wide))
notes/discussion¤
an array be a special case for tables where headings aren't needed. a table with no rows or columns headings will reveal a nice array.
this example is a most complex axis case and does not include grouping.