# adding aria to long or wide tables

sometimes tables are large and truncated by default to save screen and disc real estate.
when this happens, we break the meaning of the columns/row ordering to assistive technology.
we need to supplement the table elements with aria to ensure an accessible experience.

we'll also highlight a persistent ambiguity between visual dataframe indexing and audible dataframe indexing.

# adding aria to long or wide tables

sometimes tables are large and truncated by default to save screen and disc real estate.
when this happens, we break the meaning of the columns/row ordering to assistive technology.
we need to supplement the table elements with aria to ensure an accessible experience.

we'll also highlight a persistent ambiguity between visual dataframe indexing and audible dataframe indexing.


        {'data': {'text/html': "adding aria to long or wide tables
\nsometimes tables are large and truncated by default to save screen and disc real estate.\nwhen this happens, we break the meaning of the columns/row ordering to assistive technology.\nwe need to supplement the table elements with aria to ensure an accessible experience.
\nwe'll also highlight a persistent ambiguity between visual dataframe indexing and audible dataframe indexing.\n"}}

adding aria to long or wide tables

sometimes tables are large and truncated by default to save screen and disc real estate. when this happens, we break the meaning of the columns/row ordering to assistive technology. we need to supplement the table elements with aria to ensure an accessible experience.

we'll also highlight a persistent ambiguity between visual dataframe indexing and audible dataframe indexing.

    import pandas, bs4, enum, numpy, midgy, functools
    get_ipython().display_formatter.formatters["text/html"].for_type(bs4.BeautifulSoup, str);

    import pandas, bs4, enum, numpy, midgy, functools
    get_ipython().display_formatter.formatters["text/html"].for_type(bs4.BeautifulSoup, str);

/tmp/ipykernel_18771/1227099998.py:1: DeprecationWarning: 
Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas, bs4, enum, numpy, midgy, functools

%%
<style>
:is(.jp-OutputArea-output.jp-RenderedHTMLCommon, .nb-outputs) :is(td,th) {
    border: 1px solid;
}
</style>

%%
<style>
:is(.jp-OutputArea-output.jp-RenderedHTMLCommon, .nb-outputs) :is(td,th) {
    border: 1px solid;
}
</style>

get an aria marked up, non-uniform table.

    def get_table(df, ARIA=True, caption=None):
        soup = bs4.BeautifulSoup(features="lxml")
        ROWS, COLS = any(df.index.names), any(df.columns.names)
        WIDE = (df.shape[1] + 1) > pandas.options.display.max_columns
        LONG = (df.shape[0] + 1) > pandas.options.display.max_rows
        col_ranges, row_ranges = get_ranges(df, WIDE, LONG)
        soup.append(table := new("table", 
                                 colcount=row_major_at_cols(df) if ARIA or WIDE else None,
                                 rowcount=row_major_at_rows(df) if ARIA or LONG else None))
        table.append(cap := new("caption", caption))    
        cap.append(get_caption(df))
        get_thead(df, table, col_ranges, WIDE, ARIA, LONG)
        get_tbody(df, table, col_ranges, row_ranges, WIDE, ARIA, LONG)
        return soup

    def get_table(df, ARIA=True, caption=None):
        soup = bs4.BeautifulSoup(features="lxml")
        ROWS, COLS = any(df.index.names), any(df.columns.names)
        WIDE = (df.shape[1] + 1) > pandas.options.display.max_columns
        LONG = (df.shape[0] + 1) > pandas.options.display.max_rows
        col_ranges, row_ranges = get_ranges(df, WIDE, LONG)
        soup.append(table := new("table", 
                                 colcount=row_major_at_cols(df) if ARIA or WIDE else None,
                                 rowcount=row_major_at_rows(df) if ARIA or LONG else None))
        table.append(cap := new("caption", caption))    
        cap.append(get_caption(df))
        get_thead(df, table, col_ranges, WIDE, ARIA, LONG)
        get_tbody(df, table, col_ranges, row_ranges, WIDE, ARIA, LONG)
        return soup

    def get_thead(df, table, col_ranges, WIDE=False, ARIA=False, LONG=False):
        ROWS, COLS = any(df.index.names), any(df.columns.names)
        col_center = col_ranges[1].start - col_ranges[0].stop
        for col_level, col_name in enumerate(df.columns.names):
            table.append(tr := trow(rowindex=col_level+1 if ARIA or LONG and row_part else None))
            if not col_level:
                if ROWS or not COLS:
                    for row_level, row_name in enumerate(df.index.names):
                        tr.append(th := theading(
                            str(row_name) or F"index {row_level}", scope="col", 
                            rowspan=df.columns.nlevels if df.columns.nlevels > 1 else None, 
                            colindex=row_level+1 if ARIA else None))
            if COLS: tr.append(theading(
                str(col_name) or F"level {col_level}", scope="row",
                colindex=df.index.nlevels+1 if ARIA else None))
    
            for col_part, col_range in enumerate(col_ranges):
                if col_part:
                    tr.append(theading(HIDDEN, colindex=col_index+2+df.index.nlevels + bool(LONG and WIDE) if ARIA else None,
                                       **{"aria-colspan": col_center}))
                for col_index in col_range:
                    col_value = df.columns.get_level_values(col_level)[col_index]
                    tr.append(theading(str(col_value), scope="col", colindex=df.index.nlevels + int(ROWS and COLS) + col_index + 1 if ARIA or WIDE and col_part else None))

    def get_thead(df, table, col_ranges, WIDE=False, ARIA=False, LONG=False):
        ROWS, COLS = any(df.index.names), any(df.columns.names)
        col_center = col_ranges[1].start - col_ranges[0].stop
        for col_level, col_name in enumerate(df.columns.names):
            table.append(tr := trow(rowindex=col_level+1 if ARIA or LONG and row_part else None))
            if not col_level:
                if ROWS or not COLS:
                    for row_level, row_name in enumerate(df.index.names):
                        tr.append(th := theading(
                            str(row_name) or F"index {row_level}", scope="col", 
                            rowspan=df.columns.nlevels if df.columns.nlevels > 1 else None, 
                            colindex=row_level+1 if ARIA else None))
            if COLS: tr.append(theading(
                str(col_name) or F"level {col_level}", scope="row",
                colindex=df.index.nlevels+1 if ARIA else None))
    
            for col_part, col_range in enumerate(col_ranges):
                if col_part:
                    tr.append(theading(HIDDEN, colindex=col_index+2+df.index.nlevels + bool(LONG and WIDE) if ARIA else None,
                                       **{"aria-colspan": col_center}))
                for col_index in col_range:
                    col_value = df.columns.get_level_values(col_level)[col_index]
                    tr.append(theading(str(col_value), scope="col", colindex=df.index.nlevels + int(ROWS and COLS) + col_index + 1 if ARIA or WIDE and col_part else None))

one of the confusions that will be encountered is that 
natural row and column indexing of tidy data frames will be inconsistent with those announced by assistive technology.
assistive technology indexes tables starting from

one of the confusions that will be encountered is that 
natural row and column indexing of tidy data frames will be inconsistent with those announced by assistive technology.
assistive technology indexes tables starting from


        {'data': {'text/html': 'one of the confusions that will be encountered is that\nnatural row and column indexing of tidy data frames will be inconsistent with those announced by assistive technology.\nassistive technology indexes tables starting from\n'}}

one of the confusions that will be encountered is that natural row and column indexing of tidy data frames will be inconsistent with those announced by assistive technology. assistive technology indexes tables starting from

    def row_major_at_rows(df): return df.columns.nlevels + len(df) 
    def row_major_at_cols(df): return df.index.nlevels + int(any(df.columns.names)) + len(df.columns)

    def row_major_at_rows(df): return df.columns.nlevels + len(df) 
    def row_major_at_cols(df): return df.index.nlevels + int(any(df.columns.names)) + len(df.columns)

we present a potentational solution provide more information in the caption that may alert screen readers to the mismatch.

we present a potentational solution provide more information in the caption that may alert screen readers to the mismatch.


        {'data': {'text/html': 'we present a potentational solution provide more information in the caption that may alert screen readers to the mismatch.\n'}}

we present a potentational solution provide more information in the caption that may alert screen readers to the mismatch.

    def get_caption(df):
        dl = new("dl", role="presentation")
        dl.append(new("dt", "rows")), dl.append(new("dd", str(len(df))))
        dl.append(new("dt", "columns")), dl.append(new("dd", str(len(df.columns))))
        dl.append(new("dt", "indexes:")), dl.append(new("dd", indexes := new("dl", role="presentation")))
        indexes.append(new("dt", "rows")), indexes.append(new("dd", str(df.index.nlevels)))
        indexes.append(new("dt", "columns")), indexes.append(new("dd", str(df.columns.nlevels)))
        return dl

    def get_caption(df):
        dl = new("dl", role="presentation")
        dl.append(new("dt", "rows")), dl.append(new("dd", str(len(df))))
        dl.append(new("dt", "columns")), dl.append(new("dd", str(len(df.columns))))
        dl.append(new("dt", "indexes:")), dl.append(new("dd", indexes := new("dl", role="presentation")))
        indexes.append(new("dt", "rows")), indexes.append(new("dd", str(df.index.nlevels)))
        indexes.append(new("dt", "columns")), indexes.append(new("dd", str(df.columns.nlevels)))
        return dl

iterate through the rows of data and append them to the table

    def get_tbody(df, table, col_ranges, row_ranges, WIDE=False, ARIA=False, LONG=False):
        ROWS, COLS = any(df.index.names), any(df.columns.names)
        row_center = row_ranges[1].start - row_ranges[0].stop
        col_center = col_ranges[1].start - col_ranges[0].stop
        for row_part, row_range in enumerate(row_ranges):
            if row_part:
                table.append(tr := trow(rowindex=row_index+2+df.columns.nlevels, **{"aria-rowspan": row_center}))
                for row_level in range(df.index.nlevels): tr.append(theading(HIDDEN,colindex=row_level+1))
                if ROWS and COLS: tr.append(tdata(EMPTY, colindex=row_level+2))
                for col_part, col_range in enumerate(col_ranges):
                    if col_part: tr.append(tdata(HIDDEN, colindex=col_index+2+df.index.nlevels+int(ROWS and COLS), **{"aria-rowspan": row_center, "aria-colspan": col_center}),)
                    for col_index in col_range: tr.append(tdata(HIDDEN, colindex=col_index + 1 + df.index.nlevels + int(ROWS and COLS)))
            for row_index in row_range:
                table.append(tr := trow(rowindex=row_index+1+df.columns.nlevels))        
                for row_level in range(df.index.nlevels):
                    tr.append(theading(str(df.index.get_level_values(row_level)[row_index]), colindex=row_level+1 if ARIA else None, scope="row"))
                if ROWS and COLS: tr.append(tdata(EMPTY, colindex=row_level+2))
                for col_part, col_range in enumerate(col_ranges):
                    if col_part: tr.append(tdata(
                        HIDDEN, colindex=col_index+2+ df.index.nlevels + int(ROWS and COLS), **{"aria-colspan": col_center}))
                    for col_index in col_range:
                        tr.append(tdata(str(df.iloc[row_index, col_index]), colindex=col_index + 1 + df.index.nlevels + int(ROWS and COLS)))

    def get_tbody(df, table, col_ranges, row_ranges, WIDE=False, ARIA=False, LONG=False):
        ROWS, COLS = any(df.index.names), any(df.columns.names)
        row_center = row_ranges[1].start - row_ranges[0].stop
        col_center = col_ranges[1].start - col_ranges[0].stop
        for row_part, row_range in enumerate(row_ranges):
            if row_part:
                table.append(tr := trow(rowindex=row_index+2+df.columns.nlevels, **{"aria-rowspan": row_center}))
                for row_level in range(df.index.nlevels): tr.append(theading(HIDDEN,colindex=row_level+1))
                if ROWS and COLS: tr.append(tdata(EMPTY, colindex=row_level+2))
                for col_part, col_range in enumerate(col_ranges):
                    if col_part: tr.append(tdata(HIDDEN, colindex=col_index+2+df.index.nlevels+int(ROWS and COLS), **{"aria-rowspan": row_center, "aria-colspan": col_center}),)
                    for col_index in col_range: tr.append(tdata(HIDDEN, colindex=col_index + 1 + df.index.nlevels + int(ROWS and COLS)))
            for row_index in row_range:
                table.append(tr := trow(rowindex=row_index+1+df.columns.nlevels))        
                for row_level in range(df.index.nlevels):
                    tr.append(theading(str(df.index.get_level_values(row_level)[row_index]), colindex=row_level+1 if ARIA else None, scope="row"))
                if ROWS and COLS: tr.append(tdata(EMPTY, colindex=row_level+2))
                for col_part, col_range in enumerate(col_ranges):
                    if col_part: tr.append(tdata(
                        HIDDEN, colindex=col_index+2+ df.index.nlevels + int(ROWS and COLS), **{"aria-colspan": col_center}))
                    for col_index in col_range:
                        tr.append(tdata(str(df.iloc[row_index, col_index]), colindex=col_index + 1 + df.index.nlevels + int(ROWS and COLS)))

`pandas.options.display.max_columns, pandas.options.display.max_rows` determine the ranges of values that are presented to the visitor.
this work implements special logic for spanning multiple rows and columns for truncated data.

`pandas.options.display.max_columns, pandas.options.display.max_rows` determine the ranges of values that are presented to the visitor.
this work implements special logic for spanning multiple rows and columns for truncated data.


        {'data': {'text/html': 'pandas.options.display.max_columns, pandas.options.display.max_rows determine the ranges of values that are presented to the visitor.\nthis work implements special logic for spanning multiple rows and columns for truncated data.\n'}}

pandas.options.display.max_columns, pandas.options.display.max_rows determine the ranges of values that are presented to the visitor. this work implements special logic for spanning multiple rows and columns for truncated data.

    def get_frame_bounds(df, WIDE=False, LONG=False):
        a, b, c, d = len(df.columns), len(df.columns), len(df), len(df)
        if WIDE: a = pandas.options.display.max_columns//2; b -=  a
        if LONG: c = pandas.options.display.max_rows//2; d -= c
        return a, b, c, d
    def get_ranges(df, WIDE=False, LONG=False):
        a, b, c, d = get_frame_bounds(df, WIDE=WIDE, LONG=LONG)
        return (range(a), range(b, df.shape[1])), (range(c), range(d, df.shape[0]))

    def get_frame_bounds(df, WIDE=False, LONG=False):
        a, b, c, d = len(df.columns), len(df.columns), len(df), len(df)
        if WIDE: a = pandas.options.display.max_columns//2; b -=  a
        if LONG: c = pandas.options.display.max_rows//2; d -= c
        return a, b, c, d
    def get_ranges(df, WIDE=False, LONG=False):
        a, b, c, d = get_frame_bounds(df, WIDE=WIDE, LONG=LONG)
        return (range(a), range(b, df.shape[1])), (range(c), range(d, df.shape[0]))

to provide the proper mark up for large tables we need to be rigorous about use of ARIA: `rowindex` `colindex` `rowspan` `colspan`.

to provide the proper mark up for large tables we need to be rigorous about use of ARIA: `rowindex` `colindex` `rowspan` `colspan`.


        {'data': {'text/html': 'to provide the proper mark up for large tables we need to be rigorous about use of ARIA: rowindex colindex rowspan colspan.\n'}}

to provide the proper mark up for large tables we need to be rigorous about use of ARIA: rowindex colindex rowspan colspan .

    def new(tag, 
            string=None,rowindex=None, colindex=None, rowcount=None, colcount=None,rowspan=None,colspan=None,scope=None,
            *, soup=bs4.BeautifulSoup(features="lxml"), **attrs):
        """create a new beautiful soup with table and aria properties"""
        data = locals()
        attrs.update({F"aria-{k}": data.get(k) for k in ["rowindex", "colindex", "rowcount", "colcount"]  if data.get(k)})
        attrs.update({k: data.get(k) for k in ["rowspan", "colspan", "scope"] if data.get(k)})
        tag = soup.new_tag(tag, attrs=attrs)
        if string: tag.append(string)
        return tag
    trow = functools.partial(new, "tr")
    theading = functools.partial(new, "th")
    tdata = functools.partial(new, "td")

    def new(tag, 
            string=None,rowindex=None, colindex=None, rowcount=None, colcount=None,rowspan=None,colspan=None,scope=None,
            *, soup=bs4.BeautifulSoup(features="lxml"), **attrs):
        """create a new beautiful soup with table and aria properties"""
        data = locals()
        attrs.update({F"aria-{k}": data.get(k) for k in ["rowindex", "colindex", "rowcount", "colcount"]  if data.get(k)})
        attrs.update({k: data.get(k) for k in ["rowspan", "colspan", "scope"] if data.get(k)})
        tag = soup.new_tag(tag, attrs=attrs)
        if string: tag.append(string)
        return tag
    trow = functools.partial(new, "tr")
    theading = functools.partial(new, "th")
    tdata = functools.partial(new, "td")

sample data

    index = pandas.MultiIndex.from_product([["A", "Z"], ["M", "N", "O"], [1, 2, 3]], names=[*"JKL"])
    (df := pandas.DataFrame(columns=index, index=index).rename_axis(columns=[10, 100, 1000]).head())
    single = df.droplevel((0, 1), 0).droplevel((0, 1), 1).rename_axis(None, axis=1).rename_axis(None, axis=0)
    wide = pandas.concat([pandas.concat([df]*10, axis=1)]*20)
    with (options := pandas.option_context("display.max_rows", 4, "display.max_columns", 4)):
        display(wide)

    index = pandas.MultiIndex.from_product([["A", "Z"], ["M", "N", "O"], [1, 2, 3]], names=[*"JKL"])
    (df := pandas.DataFrame(columns=index, index=index).rename_axis(columns=[10, 100, 1000]).head())
    single = df.droplevel((0, 1), 0).droplevel((0, 1), 1).rename_axis(None, axis=1).rename_axis(None, axis=0)
    wide = pandas.concat([pandas.concat([df]*10, axis=1)]*20)
    with (options := pandas.option_context("display.max_rows", 4, "display.max_columns", 4)):
        display(wide)

		10	A		...	Z
		100	M		...	O
		1000	1	2	...	2	3
J	K	L
A	M	1	NaN	NaN	...	NaN	NaN
	M	2	NaN	NaN	...	NaN	NaN
	...	...	...	...	...	...	...
	N	1	NaN	NaN	...	NaN	NaN
	N	2	NaN	NaN	...	NaN	NaN

100 rows × 180 columns

%%
<style>
/**inline flex the dl block display and force it cause jupyter is aggressive.**/
table>caption dl {
    display: inline-flex !important;
    justify-content: flex-start;
}
/** add punctation after each of the descriptors. **/
table>caption dl>dd {
    &::after {content: ", "}; &:last-child::after {content: " "};
}
/**unset some jupyter nonsense**/
dl > dt, dl > dd {
    width: unset !important;
    float: unset !important;
    padding-right: 1rem !important;
}
</style>

%%
<style>
/**inline flex the dl block display and force it cause jupyter is aggressive.**/
table>caption dl {
    display: inline-flex !important;
    justify-content: flex-start;
}
/** add punctation after each of the descriptors. **/
table>caption dl>dd {
    &::after {content: ", "}; &:last-child::after {content: " "};
}
/**unset some jupyter nonsense**/
dl > dt, dl > dd {
    width: unset !important;
    float: unset !important;
    padding-right: 1rem !important;
}
</style>

HIDDEN and EMPTY are used for visual verification of the technique.

    HIDDEN, EMPTY = "hidden",  "empty"

    HIDDEN, EMPTY = "hidden",  "empty"

    with options: display(get_table(wide, "a smaller table representation with hidden rows and columns."))

    with options: display(get_table(wide, "a smaller table representation with hidden rows and columns."))

rows

100

columns

180

indexes:

rows

3

columns

3
J	K	L	10	A	A	hidden	Z	Z
			100	M	M	hidden	O	O
			1000	1	2	hidden	2	3
A	M	1	empty	nan	nan	hidden	nan	nan
A	M	2	empty	nan	nan	hidden	nan	nan
hidden	hidden	hidden	empty	hidden	hidden	hidden	hidden	hidden
A	N	1	empty	nan	nan	hidden	nan	nan
A	N	2	empty	nan	nan	hidden	nan	nan

    with pandas.option_context("display.max_rows", 10, "display.max_columns", 10): display(get_table(wide))

    with pandas.option_context("display.max_rows", 10, "display.max_columns", 10): display(get_table(wide))

rows

100

columns

180

indexes:

rows

3

columns

3
J	K	L	10	A	A	A	A	A	hidden	Z	Z	Z	Z	Z
			100	M	M	M	N	N	hidden	N	N	O	O	O
			1000	1	2	3	1	2	hidden	2	3	1	2	3
A	M	1	empty	nan	nan	nan	nan	nan	hidden	nan	nan	nan	nan	nan
A	M	2	empty	nan	nan	nan	nan	nan	hidden	nan	nan	nan	nan	nan
A	M	3	empty	nan	nan	nan	nan	nan	hidden	nan	nan	nan	nan	nan
A	N	1	empty	nan	nan	nan	nan	nan	hidden	nan	nan	nan	nan	nan
A	N	2	empty	nan	nan	nan	nan	nan	hidden	nan	nan	nan	nan	nan
hidden	hidden	hidden	empty	hidden	hidden	hidden	hidden	hidden	hidden	hidden	hidden	hidden	hidden	hidden
A	M	1	empty	nan	nan	nan	nan	nan	hidden	nan	nan	nan	nan	nan
A	M	2	empty	nan	nan	nan	nan	nan	hidden	nan	nan	nan	nan	nan
A	M	3	empty	nan	nan	nan	nan	nan	hidden	nan	nan	nan	nan	nan
A	N	1	empty	nan	nan	nan	nan	nan	hidden	nan	nan	nan	nan	nan
A	N	2	empty	nan	nan	nan	nan	nan	hidden	nan	nan	nan	nan	nan

notes/discussion

an array be a special case for tables where headings aren't needed. a table with no rows or columns headings will reveal a nice array.

an array be a special case for tables where headings aren't needed. a table with no rows or columns headings will reveal a nice array.


        {'data': {'text/html': "an array be a special case for tables where headings aren't needed. a table with no rows or columns headings will reveal a nice array.\n"}}

an array be a special case for tables where headings aren't needed. a table with no rows or columns headings will reveal a nice array.

this example is a most complex axis case and does not include grouping.

this example is a most complex axis case and does not include grouping.

this example is a most complex axis case and does not include grouping.

	index	execution_count	cell_type	source	outputs	metadata	toolbar	loc
code
markdown
raw

	cell	source	outputs
code
markdown
raw