skip to main content

@tonyfast s notebooks

site navigation
notebook summary
adding aria to long or wide tables
sometimes tables are large and truncated by default to save screen and disc real estate. when this happens, we break the meaning of the columns/row ordering to assistive technology. we need to supplement the table elements with aria to ensure an accessible experience.
26 total
14 code
executed in order
Python 3 (ipykernel)
lines of code
table of contents
{"kernelspec": {"display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3"}, "widgets": {"application/vnd.jupyter.widget-state+json": {"state": {}, "version_major": 2, "version_minor": 0}}, "title": "adding aria to long or wide tables", "description": "sometimes tables are large and truncated by default to save screen and disc real estate.\nwhen this happens, we break the meaning of the columns/row ordering to assistive technology.\nwe need to supplement the table elements with aria to ensure an accessible experience."}
notebook toolbar
cell ordering

adding aria to long or wide tables

sometimes tables are large and truncated by default to save screen and disc real estate. when this happens, we break the meaning of the columns/row ordering to assistive technology. we need to supplement the table elements with aria to ensure an accessible experience.

we'll also highlight a persistent ambiguity between visual dataframe indexing and audible dataframe indexing.

    import pandas, bs4, enum, numpy, midgy, functools
    get_ipython().display_formatter.formatters["text/html"].for_type(bs4.BeautifulSoup, str);
1 outputs.
/tmp/ipykernel_18771/ DeprecationWarning: 
Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at
  import pandas, bs4, enum, numpy, midgy, functools

3 1 outputs.

get an aria marked up, non-uniform table.

    def get_table(df, ARIA=True, caption=None):
        soup = bs4.BeautifulSoup(features="lxml")
        ROWS, COLS = any(df.index.names), any(df.columns.names)
        WIDE = (df.shape[1] + 1) > pandas.options.display.max_columns
        LONG = (df.shape[0] + 1) > pandas.options.display.max_rows
        col_ranges, row_ranges = get_ranges(df, WIDE, LONG)
        soup.append(table := new("table", 
                                 colcount=row_major_at_cols(df) if ARIA or WIDE else None,
                                 rowcount=row_major_at_rows(df) if ARIA or LONG else None))
        table.append(cap := new("caption", caption))    
        get_thead(df, table, col_ranges, WIDE, ARIA, LONG)
        get_tbody(df, table, col_ranges, row_ranges, WIDE, ARIA, LONG)
        return soup
    def get_thead(df, table, col_ranges, WIDE=False, ARIA=False, LONG=False):
        ROWS, COLS = any(df.index.names), any(df.columns.names)
        col_center = col_ranges[1].start - col_ranges[0].stop
        for col_level, col_name in enumerate(df.columns.names):
            table.append(tr := trow(rowindex=col_level+1 if ARIA or LONG and row_part else None))
            if not col_level:
                if ROWS or not COLS:
                    for row_level, row_name in enumerate(df.index.names):
                        tr.append(th := theading(
                            str(row_name) or F"index {row_level}", scope="col", 
                            rowspan=df.columns.nlevels if df.columns.nlevels > 1 else None, 
                            colindex=row_level+1 if ARIA else None))
            if COLS: tr.append(theading(
                str(col_name) or F"level {col_level}", scope="row",
                colindex=df.index.nlevels+1 if ARIA else None))
            for col_part, col_range in enumerate(col_ranges):
                if col_part:
                    tr.append(theading(HIDDEN, colindex=col_index+2+df.index.nlevels + bool(LONG and WIDE) if ARIA else None,
                                       **{"aria-colspan": col_center}))
                for col_index in col_range:
                    col_value = df.columns.get_level_values(col_level)[col_index]
                    tr.append(theading(str(col_value), scope="col", colindex=df.index.nlevels + int(ROWS and COLS) + col_index + 1 if ARIA or WIDE and col_part else None))

one of the confusions that will be encountered is that natural row and column indexing of tidy data frames will be inconsistent with those announced by assistive technology. assistive technology indexes tables starting from

    def row_major_at_rows(df): return df.columns.nlevels + len(df) 
    def row_major_at_cols(df): return df.index.nlevels + int(any(df.columns.names)) + len(df.columns) 

we present a potentational solution provide more information in the caption that may alert screen readers to the mismatch.

    def get_caption(df):
        dl = new("dl", role="presentation")
        dl.append(new("dt", "rows")), dl.append(new("dd", str(len(df))))
        dl.append(new("dt", "columns")), dl.append(new("dd", str(len(df.columns))))
        dl.append(new("dt", "indexes:")), dl.append(new("dd", indexes := new("dl", role="presentation")))
        indexes.append(new("dt", "rows")), indexes.append(new("dd", str(df.index.nlevels)))
        indexes.append(new("dt", "columns")), indexes.append(new("dd", str(df.columns.nlevels)))
        return dl

iterate through the rows of data and append them to the table

    def get_tbody(df, table, col_ranges, row_ranges, WIDE=False, ARIA=False, LONG=False):
        ROWS, COLS = any(df.index.names), any(df.columns.names)
        row_center = row_ranges[1].start - row_ranges[0].stop
        col_center = col_ranges[1].start - col_ranges[0].stop
        for row_part, row_range in enumerate(row_ranges):
            if row_part:
                table.append(tr := trow(rowindex=row_index+2+df.columns.nlevels, **{"aria-rowspan": row_center}))
                for row_level in range(df.index.nlevels): tr.append(theading(HIDDEN,colindex=row_level+1))
                if ROWS and COLS: tr.append(tdata(EMPTY, colindex=row_level+2))
                for col_part, col_range in enumerate(col_ranges):
                    if col_part: tr.append(tdata(HIDDEN, colindex=col_index+2+df.index.nlevels+int(ROWS and COLS), **{"aria-rowspan": row_center, "aria-colspan": col_center}),)
                    for col_index in col_range: tr.append(tdata(HIDDEN, colindex=col_index + 1 + df.index.nlevels + int(ROWS and COLS)))
            for row_index in row_range:
                table.append(tr := trow(rowindex=row_index+1+df.columns.nlevels))        
                for row_level in range(df.index.nlevels):
                    tr.append(theading(str(df.index.get_level_values(row_level)[row_index]), colindex=row_level+1 if ARIA else None, scope="row"))
                if ROWS and COLS: tr.append(tdata(EMPTY, colindex=row_level+2))
                for col_part, col_range in enumerate(col_ranges):
                    if col_part: tr.append(tdata(
                        HIDDEN, colindex=col_index+2+ df.index.nlevels + int(ROWS and COLS), **{"aria-colspan": col_center}))
                    for col_index in col_range:
                        tr.append(tdata(str(df.iloc[row_index, col_index]), colindex=col_index + 1 + df.index.nlevels + int(ROWS and COLS)))

pandas.options.display.max_columns, pandas.options.display.max_rows determine the ranges of values that are presented to the visitor. this work implements special logic for spanning multiple rows and columns for truncated data.

    def get_frame_bounds(df, WIDE=False, LONG=False):
        a, b, c, d = len(df.columns), len(df.columns), len(df), len(df)
        if WIDE: a = pandas.options.display.max_columns//2; b -=  a
        if LONG: c = pandas.options.display.max_rows//2; d -= c
        return a, b, c, d
    def get_ranges(df, WIDE=False, LONG=False):
        a, b, c, d = get_frame_bounds(df, WIDE=WIDE, LONG=LONG)
        return (range(a), range(b, df.shape[1])), (range(c), range(d, df.shape[0]))

to provide the proper mark up for large tables we need to be rigorous about use of ARIA: rowindex colindex rowspan colspan .

    def new(tag, 
            string=None,rowindex=None, colindex=None, rowcount=None, colcount=None,rowspan=None,colspan=None,scope=None,
            *, soup=bs4.BeautifulSoup(features="lxml"), **attrs):
        """create a new beautiful soup with table and aria properties"""
        data = locals()
        attrs.update({F"aria-{k}": data.get(k) for k in ["rowindex", "colindex", "rowcount", "colcount"]  if data.get(k)})
        attrs.update({k: data.get(k) for k in ["rowspan", "colspan", "scope"] if data.get(k)})
        tag = soup.new_tag(tag, attrs=attrs)
        if string: tag.append(string)
        return tag
    trow = functools.partial(new, "tr")
    theading = functools.partial(new, "th")
    tdata = functools.partial(new, "td")

sample data

    index = pandas.MultiIndex.from_product([["A", "Z"], ["M", "N", "O"], [1, 2, 3]], names=[*"JKL"])
    (df := pandas.DataFrame(columns=index, index=index).rename_axis(columns=[10, 100, 1000]).head())
    single = df.droplevel((0, 1), 0).droplevel((0, 1), 1).rename_axis(None, axis=1).rename_axis(None, axis=0)
    wide = pandas.concat([pandas.concat([df]*10, axis=1)]*20)
    with (options := pandas.option_context("display.max_rows", 4, "display.max_columns", 4)):
1 outputs.
10 A ... Z
100 M ... O
1000 1 2 ... 2 3
A M 1 NaN NaN ... NaN NaN
2 NaN NaN ... NaN NaN
... ... ... ... ... ... ...
N 1 NaN NaN ... NaN NaN
2 NaN NaN ... NaN NaN

100 rows × 180 columns

19 1 outputs.

HIDDEN and EMPTY are used for visual verification of the technique.

    HIDDEN, EMPTY = "hidden",  "empty"
    with options: display(get_table(wide, "a smaller table representation with hidden rows and columns."))
1 outputs.
J K L 10 A A hidden Z Z
100 M M hidden O O
1000 1 2 hidden 2 3
A M 1 empty nan nan hidden nan nan
A M 2 empty nan nan hidden nan nan
hidden hidden hidden empty hidden hidden hidden hidden hidden
A N 1 empty nan nan hidden nan nan
A N 2 empty nan nan hidden nan nan
    with pandas.option_context("display.max_rows", 10, "display.max_columns", 10): display(get_table(wide))
1 outputs.
J K L 10 A A A A A hidden Z Z Z Z Z
100 M M M N N hidden N N O O O
1000 1 2 3 1 2 hidden 2 3 1 2 3
A M 1 empty nan nan nan nan nan hidden nan nan nan nan nan
A M 2 empty nan nan nan nan nan hidden nan nan nan nan nan
A M 3 empty nan nan nan nan nan hidden nan nan nan nan nan
A N 1 empty nan nan nan nan nan hidden nan nan nan nan nan
A N 2 empty nan nan nan nan nan hidden nan nan nan nan nan
hidden hidden hidden empty hidden hidden hidden hidden hidden hidden hidden hidden hidden hidden hidden
A M 1 empty nan nan nan nan nan hidden nan nan nan nan nan
A M 2 empty nan nan nan nan nan hidden nan nan nan nan nan
A M 3 empty nan nan nan nan nan hidden nan nan nan nan nan
A N 1 empty nan nan nan nan nan hidden nan nan nan nan nan
A N 2 empty nan nan nan nan nan hidden nan nan nan nan nan



an array be a special case for tables where headings aren't needed. a table with no rows or columns headings will reveal a nice array.


this example is a most complex axis case and does not include grouping.